In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

# Load the OHLC data
df = pd.read_csv('TCS.NS.csv')

# Use only the Close column for prediction
X = np.array(df['Close'].values[:-1]).reshape(-1, 1)
y = np.array(df['Close'].values[1:])

# Split the data into training and testing sets
split = int(len(X) * 0.8)
X_train, y_train = X[:split], y[:split]
X_test, y_test = X[split:], y[split:]

# Fit the linear regression model on the training data
lr = LinearRegression()
lr.fit(X_train, y_train)

# Predict the stock prices for the test data
y_pred = lr.predict(X_test)
print(y_pred)

# Print the mean squared error of the predictions
print('Mean Squared Error:', np.mean((y_pred - y_test)**2))

[3708.1769112  3705.8306847  3731.99020163 3740.37714016 3759.14817527
 3770.73037098 3815.11172291 3755.7535203  3684.6632067  3686.16089177
 3696.89430142 3691.60267655 3662.49751662 3528.80426072 3472.74076956
 3557.50989119 3629.24885918 3613.17385289 3548.92306562 3547.02609571
 3561.90300288 3584.96745077 3547.42538055 3543.13196776 3480.5787526
 3514.17672311 3433.50161648 3445.98232538 3439.64202741 3412.93331036
 3409.58838252 3415.82873697 3377.78768302 3452.47229401 3449.67656629
 3263.11483179 3294.11696162 3322.82259209 3289.12467806 3168.96031491
 3228.16884686 3262.46593277 3376.24002619 3365.35694588 3356.22092019
 3423.91633419 3441.03976895 3431.85401613 3363.70934553 3405.09532732
 3428.01010456 3360.91361781 3220.8303368  3211.79425463 3207.55081361
 3143.3499981  3090.33184884 3113.79558157 3213.19199618 3224.17502002
 3309.8926266  3294.21690514 3315.78342335 3319.22805008 3291.62081984
 3268.25703063 3316.18295281 3236.25644408 3217.53538073 3261.91673266
 3288.9

In [8]:
import tkinter as tk
from tkinter import messagebox
from tkinter import ttk
import pandas as pd
import mplfinance as mpf

# Load the future predictions data
future_data = pd.read_csv('predictions.csv')

# Define the Tkinter application window
window = tk.Tk()
window.title("Candlestick Graph")
window.geometry("800x600")

# Define the date selection frame
frame_date = ttk.Frame(window)
frame_date.pack(pady=20)

# Define the start date label and entry
label_start_date = ttk.Label(frame_date, text="Start Date:")
label_start_date.grid(row=0, column=0, padx=10, pady=5)
entry_start_date = ttk.Entry(frame_date)
entry_start_date.grid(row=0, column=1, padx=10, pady=5)

# Define the end date label and entry
label_end_date = ttk.Label(frame_date, text="End Date:")
label_end_date.grid(row=1, column=0, padx=10, pady=5)
entry_end_date = ttk.Entry(frame_date)
entry_end_date.grid(row=1, column=1, padx=10, pady=5)

# Define the predict button
def predict_and_plot():
    start_date = entry_start_date.get()
    end_date = entry_end_date.get()

    # Check if both dates are provided
    if start_date == '' or end_date == '':
        messagebox.showerror("Error", "Please enter both start and end dates.")
        return

    # Convert dates to datetime objects
    try:
        start_date = pd.to_datetime(start_date)
        end_date = pd.to_datetime(end_date)
    except ValueError:
        messagebox.showerror("Error", "Invalid date format. Please use the format: YYYY-MM-DD.")
        return

    # Filter the future data based on the selected dates
    mask = (future_data['Date'] >= start_date) & (future_data['Date'] <= end_date)
    filtered_data = future_data.loc[mask]

    # Check if there is any data for the selected dates
    if filtered_data.empty:
        messagebox.showerror("Error", "No data available for the selected dates.")
        return

    # Plot the candlestick graph
    filtered_data['Date'] = pd.to_datetime(filtered_data['Date'])
    filtered_data.set_index('Date', inplace=True)
    mpf.plot(filtered_data, type='candle', title='Candlestick Graph', ylabel='Price', figscale=1.5)

# Define the predict button
button_predict = ttk.Button(window, text="Predict & Plot", command=predict_and_plot)
button_predict.pack(pady=10)

# Run the Tkinter event loop
window.mainloop()


ModuleNotFoundError: No module named 'mplfinance'

In [3]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

# Load the OHLC data
df = pd.read_csv('TCS.NS.csv')

# Use only the Close column for prediction
X = np.array(df['Close'].values[:-1]).reshape(-1, 1)
y = np.array(df['Close'].values[1:])

# Fit the linear regression model on the data
lr = LinearRegression()
lr.fit(X, y)

# Predict the closing value of the stock for the next day
last_close = df['Close'].iloc[-1]
next_close = lr.predict(np.array(last_close).reshape(-1, 1))[0]

# Print the predicted closing value of the stock for the next day
print('The predicted closing value of the stock for the next day is:', next_close)

The predicted closing value of the stock for the next day is: 3120.523216050814


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Load the OHLC data
df = pd.read_csv('TCS.NS.csv')

# Use only the Close column for prediction
X = df[['Close']].iloc[:-1]
y = df[['Close']].iloc[1:]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the linear regression model on the training data
lr = LinearRegression()
lr.fit(X_train, y_train)

# Predict the closing value of the stock for the next day
last_close = df['Close'].iloc[-1]
next_close = lr.predict([[last_close]])[0][0]

# Print the predicted closing value of the stock for the next day
print('The predicted closing value of the stock for the next day is:', next_close)


The predicted closing value of the stock for the next day is: 3121.353863706954


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Load the OHLC data
df = pd.read_csv('TCS.NS.csv')

# Extract the input (X) and output (y) variables for prediction
X = df[['Close']].iloc[:-1]  # input variable is the 'Close' column of OHLC data excluding the last row
y = df[['Close']].iloc[1:]   # output variable is the 'Close' column of OHLC data excluding the first row

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the linear regression model on the training data
lr = LinearRegression()
lr.fit(X_train, y_train)

# Predict the closing value of the stock for the next day
last_close = df['Close'].iloc[-1]  # use the last row of the OHLC data to get the most recent closing value
next_close = lr.predict([[last_close]])[0][0]  # use the linear regression model to predict the next closing value

# Print the predicted closing value of the stock for the next day
print('The predicted closing value of the stock for the next day is:', next_close)


The predicted closing value of the stock for the next day is: 3121.353863706954


In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from datetime import datetime, timedelta

# Load the OHLC data
df = pd.read_csv('TCS.NS.csv')

# Extract the input (X) and output (y) variables for prediction
X = df[['Close']].iloc[:-1]  # input variable is the 'Close' column of OHLC data excluding the last row
y = df[['Close']].iloc[1:]   # output variable is the 'Close' column of OHLC data excluding the first row

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the linear regression model on the training data
lr = LinearRegression()
lr.fit(X_train, y_train)

# Prompt the user for the number of days to predict
n_days = int(input('Enter the number of days to predict: '))

# Predict the closing value of the stock for the next n days
last_close = df['Close'].iloc[-1]  # use the last row of the OHLC data to get the most recent closing value
for i in range(n_days):
    next_close = lr.predict([[last_close]])[0][0]  # use the linear regression model to predict the next closing value
    next_date = datetime.strptime(df['Date'].iloc[-1], '%Y-%m-%d') + timedelta(days=1)  # compute the next date
    print('The predicted closing value of the stock for', next_date.date(), 'is:', next_close)
    last_close = next_close  # update the last close value for the next iteration


The predicted closing value of the stock for 2023-03-25 is: 3121.353863706954
The predicted closing value of the stock for 2023-03-25 is: 3122.20549059822
The predicted closing value of the stock for 2023-03-25 is: 3123.0548865334495
The predicted closing value of the stock for 2023-03-25 is: 3123.902057356945
The predicted closing value of the stock for 2023-03-25 is: 3124.747008897699
The predicted closing value of the stock for 2023-03-25 is: 3125.5897469694332
The predicted closing value of the stock for 2023-03-25 is: 3126.4302773706404
The predicted closing value of the stock for 2023-03-25 is: 3127.268605884623
The predicted closing value of the stock for 2023-03-25 is: 3128.1047382795327
The predicted closing value of the stock for 2023-03-25 is: 3128.938680308412


In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from datetime import datetime, timedelta

# Load the OHLC data
df = pd.read_csv('TCS.NS.csv')

# Extract the input (X) and output (y) variables for prediction
X = df[['Close']].iloc[:-1]  # input variable is the 'Close' column of OHLC data excluding the last row
y = df[['Close']].iloc[1:]   # output variable is the 'Close' column of OHLC data excluding the first row

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the linear regression model on the training data
lr = LinearRegression()
lr.fit(X_train, y_train)

# Prompt the user for the number of days to predict
n_days = int(input('Enter the number of days to predict: '))

# Predict the closing value of the stock for the next n days
last_close = df['Close'].iloc[-1]  # use the last row of the OHLC data to get the most recent closing value
last_date = datetime.strptime(df['Date'].iloc[-1], '%Y-%m-%d')  # use the date of the last row of the OHLC data as the starting date
for i in range(n_days):
    next_close = lr.predict([[last_close]])[0][0]  # use the linear regression model to predict the next closing value
    next_date = last_date + timedelta(days=i+1)  # compute the next date
    print('The predicted closing value of the stock for', next_date.date(), 'is:', next_close)
    last_close = next_close  # update the last close value for the next iteration


The predicted closing value of the stock for 2023-03-25 is: 3121.353863706954
The predicted closing value of the stock for 2023-03-26 is: 3122.20549059822
The predicted closing value of the stock for 2023-03-27 is: 3123.0548865334495
The predicted closing value of the stock for 2023-03-28 is: 3123.902057356945
The predicted closing value of the stock for 2023-03-29 is: 3124.747008897699
The predicted closing value of the stock for 2023-03-30 is: 3125.5897469694332
The predicted closing value of the stock for 2023-03-31 is: 3126.4302773706404
The predicted closing value of the stock for 2023-04-01 is: 3127.268605884623
The predicted closing value of the stock for 2023-04-02 is: 3128.1047382795327
The predicted closing value of the stock for 2023-04-03 is: 3128.938680308412


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from datetime import datetime, timedelta

# Load the OHLC data
df = pd.read_csv('TCS.NS.csv')

# Extract the input (X) and output (y) variables for prediction
X = df[['Close']].drop(df.index[-1])  # input variable is the 'Close' column of OHLC data excluding the last row
y = df[['Close']].drop(df.index[0])   # output variable is the 'Close' column of OHLC data excluding the first row

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the linear regression model on the training data
lr = LinearRegression()
lr.fit(X_train, y_train)

# Prompt the user for the number of days to predict
n_days = int(input('Enter the number of days to predict: '))

# Predict the closing value of the stock for the next n days
last_close = df['Close'].iloc[-1]  # use the last row of the OHLC data to get the most recent closing value
last_date = datetime.strptime(df['Date'].tail(1).values[0], '%Y-%m-%d')  # use the date of the last row of the OHLC data as the starting date
for i in range(n_days):
    next_close = lr.predict([[last_close]])[0][0]  # use the linear regression model to predict the next closing value
    next_date = last_date + timedelta(days=i+1)  # compute the next date
    print('The predicted closing value of the stock for', next_date.date(), 'is:', next_close)
    last_close = next_close  # update the last close value for the next iteration


In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from datetime import datetime, timedelta

# Load the OHLC data
df = pd.read_csv('TCS.NS.csv')

# Extract the input (X) and output (y) variables for prediction
X = df[['Close']].drop(df.index[-1])  # input variable is the 'Close' column of OHLC data excluding the last row
y = df[['Close']].drop(df.index[0])   # output variable is the 'Close' column of OHLC data excluding the first row

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the linear regression model on the training data
lr = LinearRegression()
lr.fit(X_train, y_train)

# Prompt the user for the number of days to predict
n_days = int(input('Enter the number of days to predict: '))

# Predict the closing value of the stock for the next n days
last_close = df['Close'].iloc[-1]  # use the last row of the OHLC data to get the most recent closing value
last_date = datetime.strptime(df['Date'].tail(1).values[0], '%Y-%m-%d')  # use the date of the last row of the OHLC data as the starting date
for i in range(n_days):
    next_close = lr.predict([[last_close]])[0][0]  # use the linear regression model to predict the next closing value
    next_date = last_date + timedelta(days=i+1)  # compute the next date
    print('The predicted closing value of the stock for', next_date.date(), 'is:', next_close)
    last_close = next_close  # update the last close value for the next iteration


The predicted closing value of the stock for 2023-03-25 is: 3121.353863706954
The predicted closing value of the stock for 2023-03-26 is: 3122.20549059822
The predicted closing value of the stock for 2023-03-27 is: 3123.0548865334495
The predicted closing value of the stock for 2023-03-28 is: 3123.902057356945
The predicted closing value of the stock for 2023-03-29 is: 3124.747008897699
The predicted closing value of the stock for 2023-03-30 is: 3125.5897469694332
The predicted closing value of the stock for 2023-03-31 is: 3126.4302773706404
The predicted closing value of the stock for 2023-04-01 is: 3127.268605884623
The predicted closing value of the stock for 2023-04-02 is: 3128.1047382795327
The predicted closing value of the stock for 2023-04-03 is: 3128.938680308412


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from datetime import datetime, timedelta

# Load the OHLC data
df = pd.read_csv('5yr.csv')

# Extract the input (X) and output (y) variables for prediction
X = df[['Close']].iloc[:-1]  # input variable is the 'Close' column of OHLC data excluding the last row
y = df[['Close']].iloc[1:]   # output variable is the 'Close' column of OHLC data excluding the first row

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Fit the random forest regressor model on the training data
rf = RandomForestRegressor()
rf.fit(X_train, y_train)

# Predict the closing value of the stock for the next 10000 days
last_close = df['Close'].iloc[-1]  # use the last row of the OHLC data to get the most recent closing value
predictions = []
for i in range(10000):
    next_close = rf.predict([[last_close]])[0]  # use the random forest regressor model to predict the next closing value
    next_date = datetime.strptime(df['Date'].iloc[-1], '%Y-%m-%d') + timedelta(days=1)  # compute the next date
    predictions.append(next_close)
    last_close = next_close  # update the last close value for the next iteration

# Store the predictions in a new CSV file
output_df = pd.DataFrame({'Date': pd.date_range(start=df['Date'].iloc[-1], periods=10000 + 1, freq='D')[1:],
                          'Predicted_Close': predictions})
output_df.to_csv('predictions.csv', index=False)

# Print the predictions for the first 10000 days
# for i in range(10000):
#     print('The predicted closing value of the stock for', output_df['Date'].iloc[i].date(), 'is:', predictions[i])

  rf.fit(X_train, y_train)
