<a href="https://colab.research.google.com/github/abhisekde96/fii-pro-analysis/blob/main/CP_positions_D30.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [85]:


import pandas as pd
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import pytz
import os
from multiprocessing import Pool
import time

start_time = time.time()

# Function to load or cache the CSV
def load_or_cache_csv(date):
    local_path = f'./cache/fao_participant_oi_{date}.csv'
    if os.path.exists(local_path):
        print(f"Loading from cache: {date}")
        df = pd.read_csv(local_path)
    else:
        try:
            #print(f"Downloading for: {date}")
            df = pd.read_csv(f'http://archives.nseindia.com/content/nsccl/fao_participant_oi_{date}.csv', header=1)
            os.makedirs('./cache', exist_ok=True)  # Create cache directory if it doesn't exist
            df.to_csv(local_path, index=False)
        except Exception as e:
            print(f"Failed to load data for date: {date} - {str(e)}")
            return None
    return df

# Get current time in IST timezone
ist_timezone = pytz.timezone('Asia/Kolkata')
current_time = datetime.now(ist_timezone)

# Check if current time has crossed 9 PM
if current_time.hour >= 21:  # 21 is 9 PM in 24-hour format
    # If it has crossed 9 PM, today's date is considered
    today = current_time.date()
else:
    # If it hasn't crossed 9 PM, consider yesterday's date as today's date
    today = current_time.date() - timedelta(days=1)

# Collect the last 30 weekdays (excluding weekends)
dates_list = []
date_count = 0
while date_count < 33:
    # Check if the current date is a weekday
    if today.weekday() < 5:  # Monday is 0, Sunday is 6
        # Format and append the date to the list
        dates_list.append(today.strftime('%d%m%Y'))
        date_count += 1
    # Move to the previous day
    today -= timedelta(days=1)

# Load CSV files in parallel using multiprocessing
with Pool() as pool:
    results = pool.map(load_or_cache_csv, dates_list)

# Filter out unsuccessful loads and create dataframes dict
dataframes = {date: df for date, df in zip(dates_list, results) if df is not None}

# Reverse the list for proper date order
successful_dates = [date for date, df in zip(dates_list, results) if df is not None]
dates = pd.DataFrame(successful_dates, columns=['Date'])
dates = dates.iloc[::-1]

# Convert the dictionary values to a list of DataFrames
dataframes_list = list(dataframes.values())

# Dynamically create and process variables t0, t1, t2, etc.
for i in range(len(dataframes_list)):
    df = dataframes_list[i]
    df = df.iloc[2:4, [0, 5, 6, 7, 8]]
    df.set_index('Client Type', inplace=True)
    df[f'c{i}'] = df['Option Index Call Long'] - df['Option Index Call Short']  # Compute call difference
    df[f'p{i}'] = df['Option Index Put Long'] - df['Option Index Put Short']  # Compute put difference
    df = df.iloc[:, [4, 5]]  # Keep only the relevant columns (c and p columns)
    globals()[f't{i}'] = df

# Collect all processed DataFrames into a list
df_list = [globals()[f't{i}'] for i in range(len(dataframes_list))]

# Concatenate all DataFrames along columns (axis=1)
t_all = pd.concat(df_list, axis=1)

# Separate call and put columns
c_columns = [f'c{i}' for i in range(len(dataframes_list))]
calls = pd.concat([t_all[col] for col in c_columns], axis=1)

p_columns = [f'p{i}' for i in range(len(dataframes_list))]
puts = pd.concat([t_all[col] for col in p_columns], axis=1)

# Calculate aggregated call and put positions
agg_dict1 = {col: 'sum' for col in c_columns}
tc = calls.agg(agg_dict1)
rename_dict = {f'c{i}': i for i in range(len(dataframes_list))}
tc = tc.rename(rename_dict)
tc = pd.DataFrame(tc)

agg_dict2 = {col: 'sum' for col in p_columns}
tp = puts.agg(agg_dict2)
rename_dict = {f'p{i}': i for i in range(len(dataframes_list))}
tp = tp.rename(rename_dict)
tp = pd.DataFrame(tp)

# Reverse order and add date columns
tc = tc.iloc[::-1]
tp = tp.iloc[::-1]
tc['Date'] = dates['Date']
tp['Date'] = dates['Date']
tc['Date'] = pd.to_datetime(tc['Date'], format='%d%m%Y').dt.strftime('%d/%m/%Y')
tp['Date'] = pd.to_datetime(tp['Date'], format='%d%m%Y').dt.strftime('%d/%m/%Y')

tc.set_index('Date', inplace=True)
tp.set_index('Date', inplace=True)
tc.rename(columns={0: 'call_positions'}, inplace=True)
tp.rename(columns={0: 'put_positions'}, inplace=True)

# Final DataFrame
all_data = pd.merge(tc, tp, left_index=True, right_index=True, how='inner').reset_index()
all_data = all_data.iloc[::-1]
all_data['Date'] = pd.to_datetime(all_data['Date'], format='%d/%m/%Y')

# Calculate Call/Put Changes and Direction
all_data['Day'] = all_data['Date'].dt.day_name()
all_data['direction'] = all_data.apply(lambda row: 'CALLS>>' if row['call_positions'] > row['put_positions'] else 'PUTS>>', axis=1)
all_data['c1'] = all_data['call_positions'].shift(-1)
all_data['p1'] = all_data['put_positions'].shift(-1)
all_data['cd'] = (all_data['call_positions'] - all_data['c1']) / all_data['c1'] * 100
all_data['pd'] = (all_data['put_positions'] - all_data['p1']) / all_data['p1'] * 100
all_data['cd'] = round(all_data['cd'].abs(), 2).astype(str) + '%'
all_data['pd'] = round(all_data['pd'].abs(), 2).astype(str) + '%'
all_data['CALLS'] = all_data['call_positions'] - all_data['c1']
all_data['PUTS'] = all_data['put_positions'] - all_data['p1']
all_data['CALLS'] = all_data['CALLS'].apply(lambda x: 'ADDED' if x > 0 else 'DUMPED') + ' ' + all_data['cd']
all_data['PUTS'] = all_data['PUTS'].apply(lambda x: 'ADDED' if x > 0 else 'DUMPED') + ' ' + all_data['pd']

final_df = all_data[['Date', 'Day', 'call_positions', 'put_positions', 'CALLS', 'PUTS']]

end_time = time.time()

total_time = end_time - start_time
print(f"Total time taken: {total_time:.2f} seconds")

Loading from cache: 04102024
Loading from cache: 27092024Loading from cache: 03102024

Loading from cache: 26092024
Loading from cache: 25092024
Loading from cache: 24092024
Loading from cache: 23092024
Loading from cache: 20092024
Loading from cache: 19092024
Loading from cache: 18092024
Loading from cache: 17092024
Loading from cache: 16092024
Loading from cache: 13092024
Loading from cache: 12092024
Loading from cache: 11092024
Loading from cache: 10092024
Loading from cache: 09092024
Loading from cache: 06092024
Loading from cache: 05092024
Loading from cache: 04092024
Loading from cache: 03092024
Loading from cache: 02092024
Loading from cache: 30082024
Loading from cache: 29082024
Loading from cache: 28082024
Loading from cache: 27082024
Loading from cache: 26082024
Loading from cache: 23082024
Loading from cache: 22082024
Loading from cache: 21082024
Failed to load data for date: 02102024 - HTTP Error 404: Not Found
Loading from cache: 01102024
Loading from cache: 30092024
Total

In [86]:
final_df

Unnamed: 0,Date,Day,call_positions,put_positions,CALLS,PUTS
31,2024-10-04,Friday,458066,1061905,ADDED 224.98%,ADDED 56.55%
30,2024-10-03,Thursday,140954,678329,DUMPED 68.01%,ADDED 12.63%
29,2024-10-01,Tuesday,440563,602264,ADDED 448.3%,ADDED 20.42%
28,2024-09-30,Monday,80350,500145,DUMPED 87.42%,ADDED 1.18%
27,2024-09-27,Friday,638634,494317,DUMPED 2.41%,ADDED 207.9%
26,2024-09-26,Thursday,654379,160546,DUMPED 21.5%,DUMPED 46.35%
25,2024-09-25,Wednesday,833552,299270,ADDED 57.68%,ADDED 588.93%
24,2024-09-24,Tuesday,528620,43440,DUMPED 34.66%,ADDED 821.47%
23,2024-09-23,Monday,808992,-6021,DUMPED 21.5%,DUMPED 102.66%
22,2024-09-20,Friday,1030622,226214,ADDED 109.53%,DUMPED 38.19%
