In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from time import time
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import confusion_matrix, roc_curve, accuracy_score, f1_score, roc_auc_score, classification_report
from astropy.table import Table
from sklearn.metrics import roc_auc_score

In [2]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
student_performance = fetch_ucirepo(id=320) 
  
# data (as pandas dataframes) 
X = student_performance.data.features 
y = student_performance.data.targets 
  
# metadata 
print(student_performance.metadata) 
  
# variable information 
print(student_performance.variables) 

{'uci_id': 320, 'name': 'Student Performance', 'repository_url': 'https://archive.ics.uci.edu/dataset/320/student+performance', 'data_url': 'https://archive.ics.uci.edu/static/public/320/data.csv', 'abstract': 'Predict student performance in secondary education (high school). ', 'area': 'Social Science', 'tasks': ['Classification', 'Regression'], 'characteristics': ['Multivariate'], 'num_instances': 649, 'num_features': 30, 'feature_types': ['Integer'], 'demographics': ['Sex', 'Age', 'Other', 'Education Level', 'Occupation'], 'target_col': ['G1', 'G2', 'G3'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 2008, 'last_updated': 'Fri Jan 05 2024', 'dataset_doi': '10.24432/C5TG7T', 'creators': ['Paulo Cortez'], 'intro_paper': {'title': 'Using data mining to predict secondary school student performance', 'authors': 'P. Cortez, A. M. G. Silva', 'published_in': 'Proceedings of 5th Annual Future Business Technology Conference', 'year'

In [3]:
  
x = student_performance.data.features[['studytime']]
y = student_performance.data.targets[['G3']]

In [4]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [5]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()

In [6]:
model.fit(x_train, y_train)

In [7]:
y_pred = model.predict(x_test)

In [8]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 9.38487192267022


In [9]:
rmse = mse ** 0.5
print(f"Root Mean Squared Error: {rmse}")

Root Mean Squared Error: 3.063473832542106


In [10]:
print("Predictions", y_pred)

Predictions [[11.89138102]
 [11.89138102]
 [11.89138102]
 [11.89138102]
 [11.89138102]
 [12.88406238]
 [11.89138102]
 [11.89138102]
 [11.89138102]
 [11.89138102]
 [13.87674374]
 [10.89869966]
 [12.88406238]
 [12.88406238]
 [12.88406238]
 [11.89138102]
 [11.89138102]
 [12.88406238]
 [13.87674374]
 [11.89138102]
 [10.89869966]
 [12.88406238]
 [11.89138102]
 [10.89869966]
 [11.89138102]
 [10.89869966]
 [10.89869966]
 [13.87674374]
 [12.88406238]
 [11.89138102]
 [12.88406238]
 [11.89138102]
 [10.89869966]
 [11.89138102]
 [11.89138102]
 [11.89138102]
 [10.89869966]
 [12.88406238]
 [10.89869966]
 [11.89138102]
 [12.88406238]
 [11.89138102]
 [11.89138102]
 [13.87674374]
 [11.89138102]
 [11.89138102]
 [12.88406238]
 [11.89138102]
 [12.88406238]
 [10.89869966]
 [12.88406238]
 [12.88406238]
 [10.89869966]
 [10.89869966]
 [11.89138102]
 [13.87674374]
 [10.89869966]
 [12.88406238]
 [11.89138102]
 [10.89869966]
 [10.89869966]
 [11.89138102]
 [12.88406238]
 [11.89138102]
 [10.89869966]
 [11.89138102

In [21]:
# 1: < 2 hours
# 2: 2 to 5 hours
# 3: 5 to 10 hours
# 4: > 10 hours
new_data = np.array([[1]])
new_prediction = model.predict(new_data)
print(f"Prediction for LESS than 2 hours of study time: {new_prediction[0]}")

Prediction for LESS than 2 hours of study time: [10.89869966]




In [22]:
# 1: < 2 hours
# 2: 2 to 5 hours
# 3: 5 to 10 hours
# 4: > 10 hours
new_data = np.array([[2]])
new_prediction = model.predict(new_data)
print(f"Prediction for 2 to 5 hours of study time: {new_prediction[0]}")

Prediction for 2 to 5 hours of study time: [11.89138102]




In [23]:
# 1: < 2 hours
# 2: 2 to 5 hours
# 3: 5 to 10 hours
# 4: > 10 hours
new_data = np.array([[3]])
new_prediction = model.predict(new_data)
print(f"Prediction for 5 to 10 hours of study time: {new_prediction[0]}")

Prediction for 5 to 10 hours of study time: [12.88406238]




In [24]:
# 1: < 2 hours
# 2: 2 to 5 hours
# 3: 5 to 10 hours
# 4: > 10 hours
new_data = np.array([[4]])
new_prediction = model.predict(new_data)
print(f"Prediction for more than 10 hours of study time: {new_prediction[0]}")

Prediction for more than 10 hours of study time: [13.87674374]


