<a href="https://colab.research.google.com/github/anuska14ghosh/Stock_Buy_Sell_Predictor/blob/main/Profit_max.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset
data = pd.read_csv('/content/NIFTY50_all.csv')

# Display the first few rows of the data
print(data.head())

# Assuming the dataset has columns: 'Open', 'High', 'Low', 'LTP', 'Volume (lacs)', 'Turnover (crs.)', 'Close'
# Creating a new column 'Change' to indicate price change
data['Change'] = data['Close'].diff()

# Dropping rows with NaN values
data = data.dropna()

# Creating a target variable 'Buy' (1 if price is expected to go up, 0 otherwise)
data['Buy'] = (data['Change'] > 0).astype(int)

# Selecting features
features = ['Open', 'High', 'Low', 'Last', 'Volume', 'Turnover']
X = data[features]
y = data['Buy']

# Splitting the data into training and testing sets (50/50 ratio)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

# Scaling the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


         Date      Symbol Series  Prev Close    Open     High    Low   Last  \
0  2007-11-27  MUNDRAPORT     EQ      440.00  770.00  1050.00  770.0  959.0   
1  2007-11-28  MUNDRAPORT     EQ      962.90  984.00   990.00  874.0  885.0   
2  2007-11-29  MUNDRAPORT     EQ      893.90  909.00   914.75  841.0  887.0   
3  2007-11-30  MUNDRAPORT     EQ      884.20  890.00   958.00  890.0  929.0   
4  2007-12-03  MUNDRAPORT     EQ      921.55  939.75   995.00  922.0  980.0   

    Close    VWAP    Volume      Turnover  Trades  Deliverable Volume  \
0  962.90  984.72  27294366  2.687719e+15     NaN           9859619.0   
1  893.90  941.38   4581338  4.312765e+14     NaN           1453278.0   
2  884.20  888.09   5124121  4.550658e+14     NaN           1069678.0   
3  921.55  929.17   4609762  4.283257e+14     NaN           1260913.0   
4  969.30  965.65   2977470  2.875200e+14     NaN            816123.0   

   %Deliverble  
0       0.3612  
1       0.3172  
2       0.2088  
3       0.2735  
4

### **Model Training**

using ***RandomForestClassifier***

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Initializing the model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Training the model
model.fit(X_train, y_train)

# Making predictions
y_pred = model.predict(X_test)

# Evaluating the model
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(classification_report(y_test, y_pred))


Accuracy: 0.7803463404905936
              precision    recall  f1-score   support

           0       0.78      0.78      0.78     29765
           1       0.78      0.78      0.78     30407

    accuracy                           0.78     60172
   macro avg       0.78      0.78      0.78     60172
weighted avg       0.78      0.78      0.78     60172



### **Buy/Sell Strategy**
Implementing a basic buy/sell strategy based on model predictions.

In [None]:
# Adding predictions to the test set
X_test_df = pd.DataFrame(X_test, columns=features)
X_test_df['Buy_Pred'] = y_pred

# Resetting the index of y_test to align with X_test_df
y_test = y_test.reset_index(drop=True)

# Extract corresponding 'Close' values using the reset indices
X_test_df['Close'] = data.iloc[y_test.index]['Close'].values

# Implementing the strategy
initial_balance = 100000  # Starting with $100,000
balance = initial_balance
stock_holdings = 0
profit = 0

for i in range(len(X_test_df)):
    if X_test_df.iloc[i]['Buy_Pred'] == 1 and balance > X_test_df.iloc[i]['Close']:
        # Buy stock
        stock_holdings += balance // X_test_df.iloc[i]['Close']
        balance %= X_test_df.iloc[i]['Close']
    elif X_test_df.iloc[i]['Buy_Pred'] == 0 and stock_holdings > 0:
        # Sell stock
        balance += stock_holdings * X_test_df.iloc[i]['Close']
        stock_holdings = 0

# Calculate final balance
final_balance = balance + (stock_holdings * X_test_df.iloc[-1]['Close'])
profit = final_balance - initial_balance

print(f'Initial Balance: ${initial_balance}')
print(f'Final Balance: ${final_balance}')
print(f'Total Profit: ${profit}')


Initial Balance: $100000
Final Balance: $11614971.450000027
Total Profit: $11514971.450000027


In [None]:
# Load new data to predict (assuming same structure as original data)
new_data = pd.read_csv('/content/WIPRO.csv')

# Predict on new data and evaluate profit
predicted_data = predict_and_evaluate_profit(new_data, model, features)

# Save the predictions to a CSV file
predicted_data.to_csv('predicted_data_with_evaluation.csv', index=False)
print("Predictions with evaluation saved to 'predicted_data_with_evaluation.csv'")

print(predicted_data[['Date', 'Close', 'Prediction', 'Recommendation', 'Actual_Next_Day_Movement', 'Correct_Prediction']])

Initial Balance: $100000
Final Balance: $100000.0
Total Profit: $0.0
Predictions with evaluation saved to 'predicted_data_with_evaluation.csv'
            Date   Close  Prediction Recommendation Actual_Next_Day_Movement  \
2850  2011-06-01  448.30           0        Not Buy                  Not Buy   
2851  2011-06-02  442.95           1        Not Buy                      Buy   
2852  2011-06-03  443.10           1        Not Buy                      Buy   
2853  2011-06-06  445.05           0        Not Buy                  Not Buy   
2854  2011-06-07  440.60           1        Not Buy                      Buy   
...          ...     ...         ...            ...                      ...   
5301  2021-04-26  480.30           0        Not Buy                      Buy   
5302  2021-04-27  485.05           0        Not Buy                      Buy   
5303  2021-04-28  489.30           0        Not Buy                      Buy   
5304  2021-04-29  489.85           0        Not Buy      