In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.model_selection import train_test_split,GridSearchCV,cross_validate
from sklearn.preprocessing import StandardScaler,PolynomialFeatures
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
import joblib
def linreg(x,y): #model for linear regression
        x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)#train test split
        pipe=Pipeline([
            ('poly',PolynomialFeatures(degree=2,interaction_only=False)),
            ('scaler',StandardScaler()),
            ('lin_reg',LinearRegression())
        ])
        pipe.fit(x_train,y_train)
        pred=pipe.predict(x_test)
        e1=mean_squared_error(y_test,pred)#error
        return e1,pipe

def ridgereg(x,y): #ridge regressor 
        x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)#train test split
        pipe=Pipeline([
            ('poly',PolynomialFeatures(degree=2,interaction_only=False)),
            ('scaler',StandardScaler()),
            ('ridge',Ridge())
        ])
        hyperparam={'ridge__alpha':[1,3,5,10,15,20,30]}
        gs=GridSearchCV(cv=5,estimator=pipe,param_grid=hyperparam,scoring='neg_mean_squared_error')
        gs.fit(x_train,y_train)
        best=gs.best_estimator_
        best.fit(x_train,y_train)
        pred2=best.predict(x_test)
        e2=mean_squared_error(y_test,pred2)#error
        return e2,best

def lasreg(x,y): #lasso regressor
        x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)#train test split
        pipe=Pipeline([
            ('poly',PolynomialFeatures(degree=2,interaction_only=False)),
            ('scaler',StandardScaler()),
            ('lasso',Lasso())
        ])
        hyperparam={'lasso__alpha':[1,3,5,10,15,20,30]}
        gs=GridSearchCV(cv=5,estimator=pipe,param_grid=hyperparam,scoring='neg_mean_squared_error')
        gs.fit(x_train,y_train)
        best=gs.best_estimator_
        best.fit(x_train,y_train)
        pred3=best.predict(x_test)
        e3=mean_squared_error(y_test,pred3)#error
        return e3,best

def forest(x,y):# random forest regressor
        forest=RandomForestRegressor()
        x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)#train test split
        hyperparam={'forest__n_estimators':[1000,25000,4000,5000]}
        pipe=Pipeline([
            ('poly',PolynomialFeatures(degree=2,interaction_only=False)),
            ('scaler',StandardScaler()),
            ('forest',forest)
        ])
        gs=GridSearchCV(cv=5,estimator=pipe,param_grid=hyperparam)
        gs.fit(x_train,y_train)
        best=gs.best_estimator_ 
        best.fit(x_train,y_train)
        pred4=best.predict(x_test)
        e4=mean_squared_error(y_test,pred4)#error
        return e4,best
        

def predictor(ticker):
    #formatting the data 
    df=yf.download(ticker,"2022-01-01","2024-01-01")
    df = df.sort_index()
    df['Open_prev']=df['Open'].shift(1)
    df['High_prev'] = df['High'].shift(1)
    df['Low_prev'] = df['Low'].shift(1)
    df['Close_prev'] = df['Close'].shift(1)
    df['Volume_prev'] = df['Volume'].shift(1)
    df['Open_current'] = df['Open']
    df.dropna(inplace=True)

    #dividing data into targets and features
    df_restructured = df[['Open_prev','High_prev', 'Low_prev', 'Close_prev', 'Volume_prev','Open_current','Close']]
    x=df_restructured[['Open_prev','High_prev', 'Low_prev', 'Close_prev', 'Volume_prev','Open_current']]
    y=df_restructured['Close']
    #taking input from users 
    op=float(input("enter previous open"))
    h=float(input("enter previous high"))
    l=float(input("Enter previous low"))
    c=float(input("Enter previous close"))
    v=float(input("Enter previous volume"))
    oc=float(input("Enter current Open"))
    #model and errors 
    e1,p1=linreg(x,y)
    e2,p2=ridgereg(x,y)
    e3,p3=lasreg(x,y)
    e4,p4=forest(x,y)
    e=[e1,e2,e3,e4]
    p=[p1,p2,p3,p4]
    #taking models with least error
    if e[0]<e[1] and e[0]<e[2]:
        joblib.dump(p[0],'m1.joblib')
        joblib.dump(p[3],'m2.joblib')
    elif e[1]<e[0] and e[1]<e[2]:
        joblib.dump(p[1],'m1.joblib')
        joblib.dump(p[3],'m2.joblib')
    elif e[2]<e[0] and e[2]<e[1]:
        joblib.dump(p[2],'m1.joblib')
        joblib.dump(p[3],'m2.joblib')
    
    m1=joblib.load('m1.joblib')
    m2=joblib.load('m2.joblib')
    input_data = pd.DataFrame([[h, l, c, v]], columns=['High_prev', 'Low_prev', 'Close_prev', 'Volume_prev'])
    pred1=m1.predict(input_data)
    pred2=m2.predict(input_data)
    final_pred=round((pred1[0]+pred2[0])/2,2)
    return final_pred
    
    

    
    
        

        

    

In [None]:
while True:
    ch=int(input("Enter 1 to perform prediction, 2 to quit"))
    if ch==1:
        ticker=input("Enter ticker of the security whose price you want to predict")
        p=predictor(ticker)
        print("Prediction: ",p)
    elif ch==2:
        print("Thank you")
        break
