In [None]:
!pip install yfinance scikit-learn pandas numpy matplotlib

In [2]:
import yfinance as yf
import pandas as pd

# Fetch historical data for Nifty 50 index (NSE)
nifty_data = yf.download("^NSEI", start="2020-01-01", end="2024-08-16")

# Display the first few rows
print(nifty_data.head())


[*********************100%%**********************]  1 of 1 completed

                    Open          High           Low         Close  \
Date                                                                 
2020-01-01  12202.150391  12222.200195  12165.299805  12182.500000   
2020-01-02  12198.549805  12289.900391  12195.250000  12282.200195   
2020-01-03  12261.099609  12265.599609  12191.349609  12226.650391   
2020-01-06  12170.599609  12179.099609  11974.200195  11993.049805   
2020-01-07  12079.099609  12152.150391  12005.349609  12052.950195   

               Adj Close  Volume  
Date                              
2020-01-01  12182.500000  304100  
2020-01-02  12282.200195  407700  
2020-01-03  12226.650391  428800  
2020-01-06  11993.049805  396500  
2020-01-07  12052.950195  447800  





In [6]:
# Create a simple moving average (SMA) and daily return
nifty_data['SMA_20'] = nifty_data['Close'].rolling(window=20).mean()
nifty_data['Daily_Return'] = nifty_data['Close'].pct_change()

print(nifty_data['SMA_20'])
print(nifty_data['Daily_Return'])

# Drop rows with NaN values
nifty_data.dropna(inplace=True)

# Define the target variable (1 for price increase, 0 for decrease)
nifty_data['Target'] = (nifty_data['Close'].shift(-1) > nifty_data['Close']).astype(int)
print(nifty_data['Target'])
# Select features and target
features = ['SMA_20', 'Daily_Return']
X = nifty_data[features]
y = nifty_data['Target']

X , y

Date
2020-01-28             NaN
2020-01-29             NaN
2020-01-30             NaN
2020-01-31             NaN
2020-02-03             NaN
                  ...     
2024-08-08    24541.404980
2024-08-09    24543.982520
2024-08-12    24536.225000
2024-08-13    24513.840039
2024-08-14    24490.377539
Name: SMA_20, Length: 1124, dtype: float64
Date
2020-01-28         NaN
2020-01-29    0.006113
2020-01-30   -0.007725
2020-01-31   -0.006123
2020-02-03   -0.021250
                ...   
2024-08-08   -0.007429
2024-08-09    0.010387
2024-08-12   -0.000841
2024-08-13   -0.008543
2024-08-14    0.000197
Name: Daily_Return, Length: 1124, dtype: float64
Date
2020-03-02    1
2020-03-04    0
2020-03-11    0
2020-03-12    1
2020-03-13    0
             ..
2024-07-30    1
2024-07-31    1
2024-08-01    0
2024-08-06    1
2024-08-08    0
Name: Target, Length: 333, dtype: int32


(                  SMA_20  Daily_Return
 Date                                  
 2020-03-02  11908.017627     -0.006160
 2020-03-04  11851.355078     -0.004627
 2020-03-11  11591.922607      0.000665
 2020-03-12  11466.035107     -0.083019
 2020-03-13  11353.735107      0.038065
 ...                  ...           ...
 2024-07-30  24490.012402      0.000854
 2024-07-31  24531.377441      0.003776
 2024-08-01  24567.597461      0.002395
 2024-08-06  24558.562402     -0.002621
 2024-08-08  24541.404980     -0.007429
 
 [333 rows x 2 columns],
 Date
 2020-03-02    1
 2020-03-04    0
 2020-03-11    0
 2020-03-12    1
 2020-03-13    0
              ..
 2024-07-30    1
 2024-07-31    1
 2024-08-01    0
 2024-08-06    1
 2024-08-08    0
 Name: Target, Length: 333, dtype: int32)

In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")


Model Accuracy: 0.53


In [5]:
import numpy as np

# Add predictions to the original dataset with correct alignment
nifty_data['Prediction'] = np.nan

# Align the predictions with the correct indices
predictions_df = pd.DataFrame(data={'Prediction': y_pred}, index=X_test.index)
nifty_data.update(predictions_df)

# Simulate a simple backtest strategy
initial_balance = 10000
balance = initial_balance
shares = 0

for i in range(len(nifty_data) - 1):
    if nifty_data['Prediction'].iloc[i] == 1:
        # Buy the index
        shares = balance / nifty_data['Close'].iloc[i]
        balance = 0
    elif shares > 0:
        # Sell the index
        balance = shares * nifty_data['Close'].iloc[i]
        shares = 0

# Final balance
final_balance = balance + (shares * nifty_data['Close'].iloc[-1])
print(f"Final Balance: {final_balance:.2f}, Profit: {final_balance - initial_balance:.2f}")


Final Balance: 0.00, Profit: -10000.00
