In [46]:
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

In [47]:
df = pd.read_csv('/content/Student_Performance.csv')
df.head()

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,Yes,9,1,91.0
1,4,82,No,4,2,65.0
2,8,51,Yes,7,2,45.0
3,5,52,Yes,5,2,36.0
4,7,75,No,8,5,66.0


In [48]:
df['Extracurricular Activities'] = df['Extracurricular Activities'].map({'Yes': 1, 'No': 0})

# Verify the conversion
print(df)

      Hours Studied  Previous Scores  Extracurricular Activities  Sleep Hours  \
0                 7               99                           1            9   
1                 4               82                           0            4   
2                 8               51                           1            7   
3                 5               52                           1            5   
4                 7               75                           0            8   
...             ...              ...                         ...          ...   
9995              1               49                           1            4   
9996              7               64                           1            8   
9997              6               83                           1            8   
9998              9               97                           1            7   
9999              7               74                           0            8   

      Sample Question Paper

In [49]:
df.shape

(10000, 6)

In [50]:
# Creating a 3D scatter plot
fig = px.scatter_3d(df, x='Hours Studied', y='Previous Scores', z='Performance Index', title='3D Scatter Plot of Performance Index')

# Display the plot
fig.show()

In [51]:
X=df.drop(columns=['Performance Index'])
y=df['Performance Index']

In [52]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=3)

In [53]:
from sklearn.linear_model import LinearRegression

In [54]:
lr = LinearRegression()

In [55]:
lr.fit(X_train,y_train)

In [56]:
y_pred = lr.predict(X_test)

In [30]:
print("MAE",mean_absolute_error(y_test,y_pred))
print("MSE",mean_squared_error(y_test,y_pred))
print("R2 score",r2_score(y_test,y_pred))

MAE 1.6341408964461508
MSE 4.234790578228672
R2 score 0.9886212862512649


In [37]:
# Create meshgrid for prediction
x1 = np.linspace(df['Hours Studied'].min(), df['Hours Studied'].max(), 10)
x2 = np.linspace(df['Previous Scores'].min(), df['Previous Scores'].max(), 10)
xGrid1, xGrid2 = np.meshgrid(x1, x2)

# Prepare dummy features for prediction
x3_dummy = np.mean(df['Extracurricular Activities']) * np.ones_like(xGrid1)
x4_dummy = np.mean(df['Sleep Hours']) * np.ones_like(xGrid1)
x5_dummy = np.mean(df['Sample Question Papers Practiced']) * np.ones_like(xGrid1)

# Stack all features
final = np.vstack((xGrid1.ravel(), xGrid2.ravel(), x3_dummy.ravel(), x4_dummy.ravel(), x5_dummy.ravel())).T

# Make predictions
z_final = lr.predict(final).reshape(xGrid1.shape)

# Scatter plot of the original data
fig = px.scatter_3d(df, x='Hours Studied', y='Previous Scores', z='Performance Index',
                    labels={'Hours Studied': 'Hours Studied', 'Previous Scores': 'Previous Scores', 'Performance Index': 'Performance Index'})

# Add surface plot with color customization
fig.add_trace(go.Surface(x=xGrid1, y=xGrid2, z=z_final, colorscale='Viridis', opacity=0.8, showscale=True, colorbar_title='Performance Index'))

# Update layout for better visualization
fig.update_layout(title='3D Scatter and Surface Plot',
                  scene=dict(xaxis_title='Hours Studied',
                             yaxis_title='Previous Scores',
                             zaxis_title='Performance Index'))

fig.show()


X does not have valid feature names, but LinearRegression was fitted with feature names



In [38]:
lr.coef_

array([2.85634975, 1.01868664, 0.58220594, 0.4786812 , 0.19565205])

In [39]:
lr.intercept_

-34.08751184565898

In [43]:
single_row = X_test.iloc[0].values.reshape(1, -1)
prediction = lr.predict(single_row)


X does not have valid feature names, but LinearRegression was fitted with feature names



In [44]:
prediction

array([74.82509861])

In [45]:
y_test.iloc[0]

73.0