In [None]:
# Setup

# Install packages
!pip install smartapi-python
!pip install logzero
!pip install pyotp
!pip install websocket-client
!pip install ijson
!pip install mplfinance
!pip install pandas plotly

url = 'https://anaconda.org/conda-forge/libta-lib/0.4.0/download/linux-64/libta-lib-0.4.0-h166bdaf_1.tar.bz2'
!curl -L $url | tar xj -C /usr/lib/x86_64-linux-gnu/ lib --strip-components=1
!pip install conda-package-handling
!wget https://anaconda.org/conda-forge/ta-lib/0.5.1/download/linux-64/ta-lib-0.5.1-py311h9ecbd09_0.conda
!cph x ta-lib-0.5.1-py311h9ecbd09_0.conda
!mv ./ta-lib-0.5.1-py311h9ecbd09_0/lib/python3.11/site-packages/talib /usr/local/lib/python3.11/dist-packages/

# Set time zone to IST
!rm /etc/localtime
!ln -s /usr/share/zoneinfo/Asia/Kolkata /etc/localtime
!date


In [2]:

# package import statement
from SmartApi import SmartConnect #or from SmartApi.smartConnect import SmartConnect
import pyotp
from logzero import logger
import datetime
import zipfile
import os
from datetime import time, timedelta, datetime
import ijson
import glob
import json
import mplfinance as mpf
import time
import plotly.graph_objects as go
import requests
import re
import pandas as pd
from talib.abstract import *

In [None]:
import pandas as pd
import re
from datetime import datetime

def data_cleaning(file_name):
    date_str = file_name.name.split("_")[-1].split(".")[0]  # Extract 'DDMMYYYY'
    target_date = pd.to_datetime(date_str, format="%d%m%Y")  # Convert once

    print("\n--------------------------------------")
    print(f"Date: {target_date.strftime('%d %b %Y')}")

    df = pd.read_csv(file_name, usecols=['Ticker', 'Time', 'Open', 'High', 'Low', 'Close'])  # Load only required columns
    df['Time'] = pd.to_datetime(df['Time'].str.strip(), format="%H:%M:%S").dt.time  # Vectorized conversion

    # Filter required tickers directly
    mask = df['Ticker'].str.startswith('NIFTY') & df['Ticker'].str.endswith(('PE.NFO', 'CE.NFO'))
    df = df.loc[mask]

    # Extract expiry date using vectorized regex
    date_pattern = df['Ticker'].str.extract(r'(\d{2})([A-Z]{3})(\d{2})')
    if date_pattern.isnull().any().any():
        print("Warning: Some tickers did not match the expected pattern.")

    date_pattern.dropna(inplace=True)
    date_pattern.columns = ['Day', 'Month', 'Year']

    # Convert expiry dates
    date_pattern['Year'] = date_pattern['Year'].astype(int) + 2000
    date_pattern['Day'] = date_pattern['Day'].astype(int)
    date_pattern['Month'] = pd.to_datetime(date_pattern['Month'], format='%b').dt.month  # Vectorized month conversion
    df['Expiry_Date'] = pd.to_datetime(date_pattern[['Year', 'Month', 'Day']])  # Efficient date creation

    # Find the closest expiry date
    min_expiry_date = df['Expiry_Date'][df['Expiry_Date'] >= target_date].min()
    df = df.loc[df['Expiry_Date'] == min_expiry_date]

    df = df.drop(columns=['Expiry_Date'])

    return df


def add_time_to_string(time_str, m):
    # Parse the time string into a datetime object
    time_obj = datetime.strptime(time_str, "%H:%M:%S")

    # Add the specified hours and minutes
    new_time_obj = time_obj + timedelta(minutes=m)

    # Format the new time back to a string
    new_time_str = new_time_obj.strftime("%H:%M:%S")

    return new_time_str

def find_nearest_options(df, price_input, curr_time):

    # Extract hour and minute from x
    curr_time = pd.to_datetime(curr_time, format='%H:%M:%S')

    # Filter rows where the hour and minute match
    df_filtered = df[(df['Time'].apply(lambda x: x.hour) == curr_time.hour) &
                  (df['Time'].apply(lambda x: x.minute) == curr_time.minute)].copy()

    # Find difference
    df_filtered.loc[:, 'Closest'] = abs(df_filtered['Close'] - price_input)

    # Separate CE and PE options (using endswith)
    ce_options = df_filtered[df_filtered['Ticker'].str.endswith('CE.NFO')]
    pe_options = df_filtered[df_filtered['Ticker'].str.endswith('PE.NFO')]

    # Find the first CE and PE with the smallest Close >= price_input
    nearest_ce = ce_options.loc[ce_options['Closest'].idxmin()] if not ce_options.empty else None
    nearest_pe = pe_options.loc[pe_options['Closest'].idxmin()] if not pe_options.empty else None

    return nearest_ce, nearest_pe

def sl_and_reentry(df, ticker, price, time):

    # Initialize variables
    stop_loss_price = 60  # Stop loss threshold
    reentry_price = 40    # Re-entry threshold
    profit_loss = 0       # Track profit/loss
    curr_time = time      # Start tracking from the given time
    profit_p = []
    curr_price = price
    transactions = []

    while True:

        # Extract hour and minute from x
        df_filtered = df[(df['Ticker'] == ticker)].copy()
        curr_time = pd.to_datetime(curr_time, format='%H:%M:%S')


        # Assuming curr_time is a datetime.time object
        df_filtered = df_filtered[
            (
                (df_filtered['Time'].apply(lambda x: x.hour) > curr_time.hour)
            ) |
            (
                (df_filtered['Time'].apply(lambda x: x.hour) == curr_time.hour) &
                (df_filtered['Time'].apply(lambda x: x.minute) > curr_time.minute)
            )
        ]

        # Filter by price
        df_filtered_price = df_filtered[(df_filtered['High'] >= stop_loss_price)].copy()

        if (len(df_filtered_price) == 0): # No SL hit
            df_filtered = df_filtered.sort_values(by='Time')
            profit_p.append((- df_filtered.iloc[-1]['Close'] + curr_price) / 200000 * 100 * 75)
            transactions.append((curr_price, df_filtered.iloc[-1]['Close'])) # (sell, buy)
            curr_time = df_filtered.iloc[-1]['Time']
        else: # SL
            # Sort by Time and get the first row
            reentry_price = curr_price
            result = df_filtered_price.sort_values(by='Time')
            first_r = result.iloc[0]
            profit_p.append((-stop_loss_price + curr_price) / 200000 * 100 * 75)
            transactions.append((curr_price, stop_loss_price)) # (sell, buy)
            curr_time = first_r['Time']

        return curr_time, profit_p, transactions


        # else:

        #     # Filter by price
        #     df_filtered_price = df_filtered[(df_filtered['Low'] <= reentry_price)]

        #     if (len(df_filtered_price) == 0): # No reentry
        #         curr_time = add_time_to_string(curr_time, 30)
        #         return curr_time, profit_p, transactions
        #     else:
        #         # Sort by Time and get the first row
        #         result = df_filtered_price.sort_values(by='Time')
        #         first_r = result.iloc[0]
        #         curr_time = first_r['Time']
        #         active = True
        #         curr_price = reentry_price


    return None

def backtest(df):

    curr_time = "09:30:00"
    active = set()
    profit = 0
    profit_p = []
    price_input = 40
    profit_track = []



    while curr_time <= "15:00:00":

        i, j = 0, 0
        curr = find_nearest_options(df, price_input, curr_time)
        new_time = add_time_to_string(curr_time, 30)

        if curr[0] is not None:
            c_ticker = curr[0]['Ticker']
            c_price = curr[0]['Close']

            x_time = curr_time
            while i < 3:
                if x_time <= "15:00:00":
                    x_time, x_profit, x_transactions = sl_and_reentry(df, c_ticker, c_price, x_time)
                    x_time = str(x_time)[-8:]
                    curr1 = find_nearest_options(df, price_input, x_time)
                    profit_p.extend(x_profit)

                    c_ticker = curr1[0]['Ticker']
                    c_price = curr1[0]['Close']

                    dps = pd.DataFrame(x_transactions, columns=["Sell", "Buy"])
                    dps['Profit'] = x_profit

                    i += 1
                else:
                    break

        if curr[1] is not None:
            p_ticker = curr[1]['Ticker']
            p_price = curr[1]['Close']

            x_time = curr_time
            while j < 3:
                if x_time <= "15:00:00":
                    x_time, x_profit, x_transactions = sl_and_reentry(df, p_ticker, p_price, x_time)
                    x_time = str(x_time)[-8:]
                    curr2 = find_nearest_options(df, price_input, x_time)
                    profit_p.extend(x_profit)

                    p_ticker = curr2[1]['Ticker']
                    p_price = curr2[1]['Close']

                    dps = pd.DataFrame(x_transactions, columns=["Sell", "Buy"])
                    dps['Profit'] = x_profit

                    j += 1

                else:
                    break

        curr_time = new_time


    # Initialize variables
    total_sum = sum(profit_p)  # Sum of all values
    num_win_days = sum(1 for value in profit_p if value > 0)  # Count of positive values
    num_loss_days = sum(1 for value in profit_p if value < 0)  # Count of negative values

    # Print results
    print(profit_p)
    print("Total Profit %:", total_sum)
    print("Number of wins:", num_win_days)
    print("Number of losses:", num_loss_days)

    return total_sum


def process_nested_zip(zip_file_path):
    """
    Processes a zip file containing monthly zip files, which in turn contain CSV files.

    Args:
    zip_file_path: The path to the main zip file (e.g., "2020.zip").
    """
    total = 0
    with zipfile.ZipFile(zip_file_path, 'r') as main_zip:
        for month_zip_info in main_zip.infolist():
            if month_zip_info.filename.endswith(".zip"):  # Process only zip files
                month_zip_name = month_zip_info.filename
                with main_zip.open(month_zip_info) as month_zip_file:
                    if "__MACOSX" in month_zip_info.filename or month_zip_info.filename.startswith("._"):
                        continue  # Skip macOS metadata files
                    with zipfile.ZipFile(month_zip_file, 'r') as month_zip:
                        for csv_file_info in month_zip.infolist():
                            if csv_file_info.filename.endswith(".csv"):
                                csv_file_name = csv_file_info.filename
                                with month_zip.open(csv_file_info) as csv_file:
                                    # Assuming your backtest function takes a file path as input
                                    if "__MACOSX" in csv_file_info.filename or csv_file_info.filename.startswith("._"):
                                        continue
                                    df = data_cleaning(csv_file)
                                    total += backtest(df)


    print("Total : ", total)

# Call the function to process your "2020.zip" file
process_nested_zip("/content/2020.zip")


