In [None]:
# Importing all libraries required in this notebook
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt

In [None]:
# Reading data from remote link
df = pd.read_csv('https://raw.githubusercontent.com/AdiPersonalWorks/Random/master/student_scores%20-%20student_scores.csv')
print(df.shape)
print(df.describe())


In [None]:
print(df.head())

In [None]:
# Plotting the distribution of scores
fig = px.scatter(df, x="Hours", y="Scores", color="Scores",
                 size='Scores', hover_data=['Scores'],title="Relationship between Marks and Study time (Actual)")
fig.show()

In [None]:
# Prepareing the data
X = np.array(df['Hours'])
X = X.reshape(len(X),-1)
y = np.array(df['Scores'])


In [None]:
# Training the algorithm
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
lr = LinearRegression()
lr.fit(x_train, y_train)
lr_confidence = lr.score(x_test, y_test)
print("lr confidence: ", lr_confidence)

In [None]:
# Plotting the regression line and test data
line = lr.coef_*X+lr.intercept_
plt.scatter(X, y)
plt.plot(X, line);
plt.show()

In [None]:
# Predict scores w.r.t column hours
lr_prediction = lr.predict(X)

In [None]:
# Add a new column "Prediction" to dataframe df
df['Prediction']= lr_prediction

In [None]:
print(df.describe())

In [None]:

fig = px.scatter(df, x="Hours", y="Prediction", color="Prediction",
                 size='Prediction', hover_data=['Scores'],title="Plot of predicted Scores")
fig.show()

In [None]:
print(df.head(10))

In [None]:
# Comparing Actual vs Predicted(bar plot)
fig = go.Figure()

fig.add_trace(go.Bar(
    x=df['Hours'], y=df['Scores'],
    name='Actual Scores',
    marker_color='green'
))

fig.add_trace(go.Bar(
    x=df['Hours'], y=df['Prediction'],
    name='Prediction',
    marker_color='red'
))
fig.update_layout(
    title='Comparision between Actual & Predicted Scores (Bar Plot)',
    yaxis_title='Scores in percentage',
xaxis_title='Study time in Hours')
fig.update_layout(barmode='group')
fig.show()

In [None]:
# Comparing Actual vs Predicted(Scatter plot)
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=df['Hours'], y=df['Scores'],
    name='Actual Scores',
    marker_color='green',
       mode='markers'))

fig.add_trace(go.Scatter(
    x=df['Hours'], y=df['Prediction'],
    name='Predicted Scores',
    marker_color='Red',
    mode='markers'
))
fig.update_layout(
    title='Comparision between Actual & Predicted Scores (Scatter Plot)',
    yaxis_title='Scores in percentage',
xaxis_title='Study time in Hours')
fig.show()

In [None]:
# Prediction for a random data
test = input("Enter study time in hours :")
test = np.array(test,dtype='float64')
test = test.reshape(1, -1)

print('Predicted mark in % ',lr.predict(test))