In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.callbacks import EarlyStopping
import yfinance as yf
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from datetime import datetime
from sklearn.svm import SVR
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from sklearn.metrics import r2_score


In [7]:

def predict_stocks(stock_list):
    results = pd.DataFrame()
    
    for stock in stock_list:
        # Get yfinance data
        yf_data = yf.download(stock, start="2018-01-01", end=datetime.now())
        df = pd.DataFrame(yf_data)
        
        # Split data into training and testing sets
        X = df[['Open', 'High', 'Low', 'Volume']]
        y = df['Close']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

        # Train model
        model = LinearRegression()
        model.fit(X_train, y_train)

        # Make predictions on testing data
        y_pred = model.predict(X_test)

        # Calculate mean squared error
        mse = mean_squared_error(y_test, y_pred)

        # Create new DataFrame with actual and predicted closing price
        linear_stock_results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred, 'Stock': stock})

        # Add results to overall results DataFrame
        results = pd.concat([results, linear_stock_results])
        results.to_csv("C:/Stock_Project/linear.csv")
        # Calculate the accuracy score
        #accuracy = accuracy_score(y_test, y_pred)
        #print(f"{stock}: Accuracy: {accuracy}")

        # Print mean squared error
        print(f"{stock}: Mean Squared Error: {mse}")

    return results


In [8]:

def predict_stocks_svm_linear(stock_list):
    results = pd.DataFrame()
    
    for stock in stock_list:
        # Get yfinance data
        yf_data = yf.download(stock, start="2018-01-01", end=datetime.now())
        df = pd.DataFrame(yf_data)

        # Split data into training and testing sets
        X = df[['Open', 'High', 'Low', 'Volume']]
        y = df['Close']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2022)

        # Train model
        model = SVR(kernel='linear')
        model.fit(X_train, y_train)

        # Make predictions on testing data
        y_pred = model.predict(X_test)

        # Calculate mean squared error
        mse = mean_squared_error(y_test, y_pred)

        # Create new DataFrame with actual and predicted closing price
        svm_linear_stock_results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred, 'Stock': stock})

        # Add results to overall results DataFrame
        results = pd.concat([results, svm_linear_stock_results])
        results.to_csv("C:/Users/R/Desktop/Stock_data/svm_linear.csv")
         # Calculate the accuracy score
        #accuracy = accuracy_score(y_test, y_pred)
        #print(f"{stock}: Accuracy: {accuracy}")
        accuracy=r2_score(y_test, y_pred)
        print(f"{stock}: Accuracy: {accuracy}")
        # Print mean squared error
        print(f"{stock}: Mean Squared Error: {mse}")

    return results

In [9]:
def predict_stocks_svm_poly(stock_list, degree=3):
    results = pd.DataFrame()
    
    for stock in stock_list:
        # Get yfinance data
        yf_data = yf.download(stock, start="2018-01-01", end=datetime.now())
        df = pd.DataFrame(yf_data)

        # Split data into training and testing sets
        X = df[['Open', 'High', 'Low', 'Volume']]
        y = df['Close']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Train model
        model = SVR(kernel='poly', degree=degree)
        model.fit(X_train, y_train)

        # Make predictions on testing data
        y_pred = model.predict(X_test)

        # Calculate mean squared error
        mse = mean_squared_error(y_test, y_pred)

        # Create new DataFrame with actual and predicted closing price
        svm_poly_stock_results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred, 'Stock': stock})

        # Add results to overall results DataFrame
        results = pd.concat([results, svm_poly_stock_results])
        results.to_csv("C:/Users/R/Desktop/Stock_data/svm_poly.csv")
         # Calculate the accuracy score
        #accuracy = accuracy_score(y_test, y_pred)
        #print(f"{stock}: Accuracy: {accuracy}")
        accuracy=r2_score(y_test, y_pred)
        print(f"{stock}: Accuracy: {accuracy}")

        # Print mean squared error
        print(f"{stock}: Mean Squared Error: {mse}")

    return results

In [10]:

def predict_stocks_random_forest(stock_list, n_estimators=100, max_depth=None):
    results = pd.DataFrame()
    
    for stock in stock_list:
        # Get yfinance data
        yf_data = yf.download(stock, start="2017-01-01", end="2022-01-01")
        df = pd.DataFrame(yf_data)

        # Split data into training and testing sets
        X = df[['Open', 'High', 'Low', 'Volume']]
        y = df['Close']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Train model
        model = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
        model.fit(X_train, y_train)

        # Make predictions on testing data
        y_pred = model.predict(X_test)

        # Calculate mean squared error
        mse = mean_squared_error(y_test, y_pred)

        # Create new DataFrame with actual and predicted closing price
        random_forest_stock_results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred, 'Stock': stock})

        # Add results to overall results DataFrame
        results = pd.concat([results, random_forest_stock_results])
        results.to_csv("C:/Users/R/Desktop/Stock_data/random_forest.csv")
         # Calculate the accuracy score
        #accuracy = accuracy_score(y_test, y_pred)
        #print(f"{stock}: Accuracy: {accuracy}")
        accuracy=r2_score(y_test, y_pred)
        print(f"{stock}: Accuracy: {accuracy}")

        # Print mean squared error
        print(f"{stock}: Mean Squared Error: {mse}")

    return results

In [11]:
def predict_stocks_decision_tree(stock_list, max_depth=None):
    results = pd.DataFrame()
    
    for stock in stock_list:
        # Get yfinance data
        yf_data = yf.download(stock, start="2018-01-01", end=datetime.now())
        df = pd.DataFrame(yf_data)

        # Split data into training and testing sets
        X = df[['Open', 'High', 'Low', 'Volume']]
        y = df['Close']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Train model
        model = DecisionTreeRegressor(max_depth=max_depth, random_state=42)
        model.fit(X_train, y_train)

        # Make predictions on testing data
        y_pred = model.predict(X_test)

        # Calculate mean squared error
        mse = mean_squared_error(y_test, y_pred)

        # Create new DataFrame with actual and predicted closing price
        DT_stock_results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred, 'Stock': stock})

        # Add results to overall results DataFrame
        results = pd.concat([results, DT_stock_results])
        results.to_csv("C:/Users/R/Desktop/Stock_data/DT.csv")
         # Calculate the accuracy score
        #accuracy = accuracy_score(y_test, y_pred)
        #print(f"{stock}: Accuracy: {accuracy}")
        accuracy=r2_score(y_test, y_pred)
        print(f"{stock}: Accuracy: {accuracy}")

        # Print mean squared error
        print(f"{stock}: Mean Squared Error: {mse}")

    return results

In [12]:
def predict_stocks_lstm(stock_list, look_back=60, epochs=50, batch_size=32):
    results = pd.DataFrame()
    
    for stock in stock_list:
        # Get yfinance data
        yf_data = yf.download(stock,start="2018-01-01", end=datetime.now())
        df = pd.DataFrame(yf_data)

        # Scale data
        scaler = MinMaxScaler()
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaled_data = scaler.fit_transform(df[['Close']])

        # Create training and testing data
        train_data = scaled_data[:int(len(df)*0.8)]
        test_data = scaled_data[int(len(df)*0.8)-look_back:]

        X_train = []
        y_train = []

        for i in range(look_back, len(train_data)):
            X_train.append(train_data[i-look_back:i,0])
            y_train.append(train_data[i,0])

        X_train, y_train = np.array(X_train), np.array(y_train)
        X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

        # Build LSTM model
        model = Sequential()

        model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
        model.add(LSTM(50, return_sequences=False))
        model.add(Dense(25))
        model.add(Dense(1))

        model.compile(optimizer='adam', loss='mean_squared_error')

        # Train model
        early_stop = EarlyStopping(monitor='val_loss', patience=3)
        model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.1, callbacks=[early_stop])

        # Create testing data
        X_test = []
        y_test = df[int(len(df)*0.8):]['Close'].values

        for i in range(look_back, len(test_data)):
            X_test.append(test_data[i-look_back:i,0])

        X_test = np.array(X_test)
        X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

        # Make predictions on testing data
        predictions = model.predict(X_test)
        predictions = scaler.inverse_transform(predictions)

        # Create new DataFrame with actual and predicted closing price
        lstm_stock_results = pd.DataFrame({'Actual': y_test, 'Predicted': predictions.ravel(), 'Stock': stock})

        # Add results to overall results DataFrame
        results = pd.concat([results, lstm_stock_results])
        results.to_csv("C:/Users/R/Desktop/Stock_data/LSTM.csv")
        #accuracy = 1 - (predictions['Close'] - predictions['Predictions']).abs().mean() / predictions['Close'].mean()
        #print(f"{stock}: Accuracy: {accuracy}")

    return results

In [13]:
tickers = ['SUNPHARMA.NS', 'TATAMOTORS.NS', 'TCS.NS', 'TECHM.NS', 'BHEL.NS', 'RELIANCE.NS', 'INFY.NS']


In [14]:
predict_stocks(tickers)


[*********************100%***********************]  1 of 1 completed
SUNPHARMA.NS: Mean Squared Error: 20.629771733080066
[*********************100%***********************]  1 of 1 completed
TATAMOTORS.NS: Mean Squared Error: 6.146294775835197
[*********************100%***********************]  1 of 1 completed
TCS.NS: Mean Squared Error: 248.3484021203611
[*********************100%***********************]  1 of 1 completed
TECHM.NS: Mean Squared Error: 56.736529172308856
[*********************100%***********************]  1 of 1 completed
BHEL.NS: Mean Squared Error: 0.3485068023050291
[*********************100%***********************]  1 of 1 completed
RELIANCE.NS: Mean Squared Error: 130.70395580307027
[*********************100%***********************]  1 of 1 completed
INFY.NS: Mean Squared Error: 36.22565079707665


Unnamed: 0_level_0,Actual,Predicted,Stock
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-12,1032.849976,1025.385429,SUNPHARMA.NS
2019-05-10,437.750000,438.793273,SUNPHARMA.NS
2022-01-27,812.099976,805.507829,SUNPHARMA.NS
2019-09-05,431.799988,428.449577,SUNPHARMA.NS
2022-11-17,1013.349976,1018.292754,SUNPHARMA.NS
...,...,...,...
2021-03-04,1330.349976,1349.714349,INFY.NS
2019-04-24,736.450012,732.365900,INFY.NS
2019-09-12,816.400024,814.929162,INFY.NS
2022-09-29,1398.650024,1399.668860,INFY.NS


In [53]:
predict_stocks_decision_tree(tickers, max_depth=None)


[*********************100%***********************]  1 of 1 completed
SUNPHARMA.NS: Mean Squared Error: 68.08621574708185
[*********************100%***********************]  1 of 1 completed
TATAMOTORS.NS: Mean Squared Error: 18.81590879102195
[*********************100%***********************]  1 of 1 completed
TCS.NS: Mean Squared Error: 624.3955977717378
[*********************100%***********************]  1 of 1 completed
TECHM.NS: Mean Squared Error: 148.54255285221316
[*********************100%***********************]  1 of 1 completed
BHEL.NS: Mean Squared Error: 1.0852820671818533
[*********************100%***********************]  1 of 1 completed
RELIANCE.NS: Mean Squared Error: 393.02270388400274
[*********************100%***********************]  1 of 1 completed
INFY.NS: Mean Squared Error: 130.39292288484626


Unnamed: 0_level_0,Actual,Predicted,Stock
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-12,1032.849976,1017.750000,SUNPHARMA.NS
2019-05-10,437.750000,437.549988,SUNPHARMA.NS
2022-01-27,812.099976,801.349976,SUNPHARMA.NS
2019-09-05,431.799988,423.799988,SUNPHARMA.NS
2022-11-17,1013.349976,1013.549988,SUNPHARMA.NS
...,...,...,...
2020-07-06,764.000000,771.450012,INFY.NS
2021-03-24,1353.750000,1354.349976,INFY.NS
2019-05-14,713.849976,713.000000,INFY.NS
2022-01-05,1844.650024,1872.400024,INFY.NS


In [None]:
predict_stocks_random_forest(tickers, n_estimators=100, max_depth=None)


In [55]:
predict_stocks_svm_poly(tickers, degree=3)


[*********************100%***********************]  1 of 1 completed
SUNPHARMA.NS: Mean Squared Error: 42579.94371618326
[*********************100%***********************]  1 of 1 completed
TATAMOTORS.NS: Mean Squared Error: 17044.218644335633
[*********************100%***********************]  1 of 1 completed
TCS.NS: Mean Squared Error: 1620068.701789731
[*********************100%***********************]  1 of 1 completed
TECHM.NS: Mean Squared Error: 86487.05368531804
[*********************100%***********************]  1 of 1 completed
BHEL.NS: Mean Squared Error: 357.37313773117734
[*********************100%***********************]  1 of 1 completed
RELIANCE.NS: Mean Squared Error: 388155.0402122559
[*********************100%***********************]  1 of 1 completed
INFY.NS: Mean Squared Error: 196080.01108519247


Unnamed: 0_level_0,Actual,Predicted,Stock
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-12,1032.849976,576.001736,SUNPHARMA.NS
2019-05-10,437.750000,575.944431,SUNPHARMA.NS
2022-01-27,812.099976,575.938111,SUNPHARMA.NS
2019-09-05,431.799988,574.978072,SUNPHARMA.NS
2022-11-17,1013.349976,576.000203,SUNPHARMA.NS
...,...,...,...
2020-07-06,764.000000,950.928665,INFY.NS
2021-03-24,1353.750000,951.119032,INFY.NS
2019-05-14,713.849976,950.727917,INFY.NS
2022-01-05,1844.650024,951.072187,INFY.NS


In [56]:
predict_stocks_lstm(tickers, look_back=60, epochs=50, batch_size=32)

[*********************100%***********************]  1 of 1 completed
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
[*********************100%***********************]  1 of 1 completed
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
[*********************100%***********************]  1 of 1 completed
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
[*********************100%***********************]  1 of 1 completed
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
[*******

Unnamed: 0,Actual,Predicted,Stock
0,843.900024,865.343933,SUNPHARMA.NS
1,820.900024,859.255493,SUNPHARMA.NS
2,820.900024,852.960327,SUNPHARMA.NS
3,829.650024,847.036377,SUNPHARMA.NS
4,822.650024,842.493774,SUNPHARMA.NS
...,...,...,...
252,1479.300049,1592.340332,INFY.NS
253,1507.449951,1583.905884,INFY.NS
254,1492.699951,1576.557495,INFY.NS
255,1480.400024,1569.719482,INFY.NS
