## Import Packages and Load Data

In [None]:
# Import necessary packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression
import datetime

In [None]:
# Load CSV file
df = pd.read_csv('Netflix-Stock-Price-Prediction-dataset.csv')

In [None]:
# Display the first few rows of the dataframe
df.head()

## Perform necessary EDA and Preprocessing

In [None]:
# Convert 'Date' column to datetime
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

In [None]:
# dataset information
df.info()

In [None]:
# Summary Statistics
df.describe()

In [None]:
# Plot Closing Price
plt.figure(figsize=(10, 6))
plt.plot(df['Close'], label='Close Price')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.title('Closing Price Over Time')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Check for missing values
df.isnull().sum()

In [None]:
# Handling missing values (if any)
df.fillna(method='ffill', inplace=True)

In [None]:
# creating additional features based on the date
df['Year'] = df.index.year
df['Month'] = df.index.month
df['Day'] = df.index.day

## Prepare the dataset for modeling

In [None]:
# Features: 'Open', 'High', 'Low', 'Volume', 'Year', 'Month', 'Day'
# Target: 'Close'
X = df[['Open', 'High', 'Low', 'Volume', 'Year', 'Month', 'Day']]
y = df['Close']

In [None]:
# Split the dataset into 80/20 Training and Testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Generate Model

In [None]:
# Using Linear Regression for this example
model = LinearRegression()
model.fit(X_train_scaled, y_train)

In [None]:
# Predicting on the test set
y_pred = model.predict(X_test_scaled)

## Evaluate the Model

In [None]:
# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)
# R-squared (R2)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'Mean Absolute Error: {mae}')
print(f'R-squared: {r2}')

## Visualize the actual vs predicted prices

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(y_test.index, y_test, label='Actual Prices')
plt.plot(y_test.index, y_pred, label='Predicted Prices')
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Actual vs Predicted Prices')
plt.legend()
plt.grid(True)
plt.show()

## Final output

In [None]:
# Create a DataFrame to compare actual and predicted values
result_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
result_df.head()

In [None]:
# Save the result as a CSV file
result_df.to_csv('netflix_stock_prediction_results.csv', index=True)