In [2]:
import numpy as np
import pandas as pd
from datetime import datetime

# Load the data
df = pd.read_csv(r"NFLX.csv")
df.info()

# Convert the 'Date' column to datetime
df['Date'] = df['Date'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d'))
df.info()

# Drop the 'Adj Close' column
df = df.drop(['Adj Close'], axis=1)
print(df.head())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1009 entries, 0 to 1008
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       1009 non-null   object 
 1   Open       1009 non-null   float64
 2   High       1009 non-null   float64
 3   Low        1009 non-null   float64
 4   Close      1009 non-null   float64
 5   Adj Close  1009 non-null   float64
 6   Volume     1009 non-null   int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 55.3+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1009 entries, 0 to 1008
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       1009 non-null   datetime64[ns]
 1   Open       1009 non-null   float64       
 2   High       1009 non-null   float64       
 3   Low        1009 non-null   float64       
 4   Close      1009 non-null   float64       
 5   Adj Close  1009 non-null   floa

In [3]:
# Split the data into training and testing sets
x_train = df.drop(labels='Close', axis=1)[df['Date'].dt.year < 2021]
x_test = df.drop(labels='Close', axis=1)[df['Date'].dt.year >= 2021]

y_train = df[df['Date'].dt.year < 2021]['Close'].values
y_test = df[df['Date'].dt.year >= 2021]['Close'].values

# Drop the 'Date' column from training and testing sets
x_train.drop(labels='Date', axis=1, inplace=True)
x_test.drop(labels='Date', axis=1, inplace=True)

print(x_train.head())
print(x_test.head())


         Open        High         Low    Volume
0  262.000000  267.899994  250.029999  11896100
1  247.699997  266.700012  245.000000  12595800
2  266.579987  272.450012  264.329987   8981500
3  267.079987  267.619995  250.000000   9306700
4  253.850006  255.800003  236.110001  16906900
           Open        High         Low   Volume
733  539.000000  540.799988  515.090027  4444400
734  521.549988  526.780029  515.890015  3133900
735  511.970001  513.099976  499.500000  5346200
736  508.279999  516.440002  506.420013  3686400
737  511.309998  513.239990  504.510010  2973900


In [90]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error as mae, mean_squared_error as mse

# Train the Linear Regression model
linear_regressor = LinearRegression()
linear_regressor.fit(x_train, y_train)

# Make predictions
y_pred_lr = linear_regressor.predict(x_test)

# Evaluate the model
print("----------Linear Regression Model Evaluation-----------")
print("Mean Absolute Error (MAE): {}".format(mae(y_test, y_pred_lr)))
print("Mean Squared Error (MSE): {}".format(mse(y_test, y_pred_lr)))
print("Root Mean Squared Error (RMSE): {}".format(np.sqrt(mse(y_test, y_pred_lr))))
print("Test Score: {}".format(linear_regressor.score(x_test, y_test)))
print("Train Score: {}".format(linear_regressor.score(x_train, y_train)))


----------Linear Regression Model Evaluation-----------
Mean Absolute Error (MAE): 3.1132978579900707
Mean Squared Error (MSE): 17.036128617517225
Root Mean Squared Error (RMSE): 4.127484538737513
Test Score: 0.9955930255807364
Train Score: 0.9974448683406483


In [92]:
import pickle

# Save the Linear Regression model
with open('NetflixPred.pkl', 'wb') as file:
    pickle.dump(linear_regressor, file)

print(type(linear_regressor))


<class 'sklearn.linear_model._base.LinearRegression'>


In [91]:
def predict_close(Open=50.0, High=100.0, Low=0.0, Volume=4444400):
    temp_array = [Open, High, Low, Volume]
    feature_names = ['Open', 'High', 'Low', 'Volume']
    temp_df = pd.DataFrame([temp_array], columns=feature_names)
    return linear_regressor.predict(temp_df)[0]

# Input from the user
opening_price = float(input("Enter the opening value of the stock: "))
low = float(input("Enter the lowest value of the stock: "))
high = float(input("Enter the highest value of the stock: "))
volume = int(input("Enter the volume of the stock available: "))

# Predict the closing value
closing_value = predict_close(opening_price, high, low, volume)
print('Closing value of the stock is', closing_value.round(2))


Closing value of the stock is 0.65
