In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# Read the data
nflx_data = pd.read_csv('Download Data - STOCK_US_XNAS_NFLX.csv')
print(nflx_data)

# Check for any missing values
missing_values = nflx_data.isnull().sum()
print(missing_values)

# iii) Check whether numerical features have the same scale
numerical_columns = nflx_data.select_dtypes(include=[np.number]).columns
print("\nRange of Numerical Features:")
print(nflx_data[numerical_columns].max() - nflx_data[numerical_columns].min())

# Calculate mean and standard deviation
mean_values = nflx_data[numerical_columns].mean()
std_values = nflx_data[numerical_columns].std()

print("\nMean of Numerical Features:")
print(mean_values)

print("\nStandard Deviation of Numerical Features:")
print(std_values)
num_rows = nflx_data.shape[0]
# Find the highest and lowest prices for each numerical column
max_values = nflx_data.max()
min_values = nflx_data.min()

print("\nHighest Prices for Each Column:")
print(max_values)

print("\nLowest Prices for Each Column:")
print(min_values)

print("\nNumber of Rows:", num_rows)
# Selecting features and target variable
nflx_data['Volume'] = nflx_data['Volume'].str.replace(',', '').astype(float)
X = nflx_data[['Open', 'High', 'Low', 'Volume']]
y = nflx_data['Close']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Standardize the numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model Training
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Predicting on the test set
y_pred = model.predict(X_test_scaled)

# Model Evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\nMean Squared Error:", mse)
print("R-squared:", r2)

          Date    Open    High     Low   Close     Volume
0   12/15/2023  467.30  473.00  467.30  472.06  7,840,992
1   12/14/2023  480.36  480.76  464.76  469.83  5,176,437
2   12/13/2023  461.98  481.25  461.98  479.98  5,820,192
3   12/12/2023  465.23  465.61  459.20  463.00  3,302,660
4   12/11/2023  459.36  470.65  457.21  459.89  4,929,662
5   12/08/2023  450.76  455.50  450.76  453.76  3,458,384
6   12/07/2023  450.85  452.89  448.32  452.00  3,506,722
7   12/06/2023  460.00  460.50  445.73  446.73  4,178,755
8   12/05/2023  450.70  456.39  449.58  455.15  3,380,674
9   12/04/2023  460.99  461.20  451.20  453.90  5,157,713
10  12/01/2023  473.17  475.23  464.60  465.74  4,341,471
11  11/30/2023  475.31  478.59  470.42  473.97  4,287,349
12  11/29/2023  479.00  480.99  474.49  477.19  2,855,540
13  11/28/2023  478.11  480.50  475.95  479.00  2,890,164
14  11/27/2023  479.03  482.00  475.35  479.17  3,625,924
15  11/24/2023  477.11  480.40  475.20  479.56  1,404,732
16  11/22/2023