In [None]:
from google.colab import files
uploaded = files.upload()

Saving cleaned_stock_prices.csv to cleaned_stock_prices.csv


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
df = pd.read_csv("cleaned_stock_prices.csv")
target = "Price"
def convert_volume(value):
    if isinstance(value, str):
        value = value.replace(",", "").strip()
        if value.endswith("M"):
            return float(value[:-1]) * 1_000_000
        elif value.endswith("K"):
            return float(value[:-1]) * 1_000
        elif value == "-" or value == "":
            return 0
        else:
            return float(value)
    return value
if "Vol." in df.columns:
    df["Vol."] = df["Vol."].apply(convert_volume)
def clean_percentage(x):
    if isinstance(x, str):
        x = x.replace("%", "").strip()
        return float(x) / 100
    return x
if "Change %" in df.columns:
    df["Change %"] = df["Change %"].apply(clean_percentage)
df = df.drop(columns=["Date"])
X = df.drop(columns=[target])
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=X_train.shape[1]))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=50,
    batch_size=32,
    verbose=1
)
y_pred_ann = model.predict(X_test)
ann_mse = mean_squared_error(y_test, y_pred_ann)
ann_r2 = r2_score(y_test, y_pred_ann)
print("ANN Results:")
print("MSE:", ann_mse)
print("R2 Score:", ann_r2)
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
lr_mse = mean_squared_error(y_test, y_pred_lr)
lr_r2 = r2_score(y_test, y_pred_lr)
print("\nLinear Regression Results:")
print("MSE:", lr_mse)
print("R2 Score:", lr_r2)
rf = RandomForestRegressor(n_estimators=200, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
rf_mse = mean_squared_error(y_test, y_pred_rf)
rf_r2 = r2_score(y_test, y_pred_rf)
print("\nRandom Forest Results:")
print("MSE:", rf_mse)
print("R2 Score:", rf_r2)
results = pd.DataFrame({
    "Model": ["Linear Regression", "Random Forest", "ANN"],
    "MSE": [lr_mse, rf_mse, ann_mse],
    "R2 Score": [lr_r2, rf_r2, ann_r2]
})
print("\n\n=== MODEL PERFORMANCE COMPARISON ===")
print(results)


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step - loss: 175491.5625 - val_loss: 172695.3281
Epoch 2/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 174249.2031 - val_loss: 172546.4844
Epoch 3/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 174419.2656 - val_loss: 172397.1250
Epoch 4/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 174908.7812 - val_loss: 172237.9219
Epoch 5/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 174296.7500 - val_loss: 172062.8906
Epoch 6/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 173387.8750 - val_loss: 171867.7812
Epoch 7/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 173243.2656 - val_loss: 171650.0938
Epoch 8/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 173089.4219 - val_loss: 171406.

In [None]:
import zipfile
import joblib
with zipfile.ZipFile("regression_outputs.zip", "w") as zipf:
    # Save CSV
    results.to_csv("regression_model_results.csv", index=False)
    zipf.write("regression_model_results.csv")

    # Save models
    joblib.dump(lr, "linear_regression_model.pkl")
    zipf.write("linear_regression_model.pkl")

    joblib.dump(rf, "random_forest_model.pkl")
    zipf.write("random_forest_model.pkl")

    model.save("ann_model.h5")
    zipf.write("ann_model.h5")

from google.colab import files
files.download("regression_outputs.zip")




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>