# CROP

In [None]:
import os
import pandas as pd

# Input and output directories
input_directory = "data"  # Replace with your actual input directory
output_directory = "cropped"  # Replace with your actual output directory

# Ensure the output directory exists
os.makedirs(output_directory, exist_ok=True)

# Target date for filtering
filter_date = pd.to_datetime('2019-04-30')

# Iterate over each file in the input directory
for filename in os.listdir(input_directory):
    if filename.endswith(".csv"):
        file_path = os.path.join(input_directory, filename)
        
        try:
            # Read the CSV file into a DataFrame
            df = pd.read_csv(file_path)
            
            # Convert the "Date" column to datetime
            df['Date'] = pd.to_datetime(df['Date'])
            
            # Filter the DataFrame for dates on or after '2019-04-30'
            filtered_df = df[df['Date'] >= filter_date].loc[:, ["Date", "Open", "High", "Low", "Close"]]
            
            # Create a new file name for the output
            output_file_path = os.path.join(output_directory, f"{filename}")
            
            # Save the filtered DataFrame to a new CSV file
            filtered_df.to_csv(output_file_path, index=False)
            print(f"Processed and saved: {output_file_path}")
        
        except Exception as e:
            # Log any errors encountered during processing
            print(f"Error processing {filename}: {e}")


# Crop 1.5 Year


In [2]:
import os
import pandas as pd

# Input and output directories
input_directory = "data"  # Replace with your actual input directory
output_directory = "cropped_2"  # Replace with your actual output directory

# Ensure the output directory exists
os.makedirs(output_directory, exist_ok=True)

# Target date range for filtering
filter_date1 = pd.to_datetime('2019-04-30')
filter_date2 = pd.to_datetime('2020-10-30')

# Iterate over each file in the input directory
for filename in os.listdir(input_directory):
    if filename.endswith(".csv"):
        file_path = os.path.join(input_directory, filename)
        
        try:
            # Read the CSV file into a DataFrame
            df = pd.read_csv(file_path)
            
            # Convert the "Date" column to datetime
            df['Date'] = pd.to_datetime(df['Date'])
            
            # Filter the DataFrame for dates between '2019-04-30' and '2020-10-30'
            filtered_df = df[(df['Date'] >= filter_date1) & (df['Date'] <= filter_date2)].loc[:, ["Date", "Open", "High", "Low", "Close"]]
            
            # Create a new file name for the output
            output_file_path = os.path.join(output_directory, filename)
            
            # Save the filtered DataFrame to a new CSV file
            filtered_df.to_csv(output_file_path, index=False)
            print(f"Processed and saved: {output_file_path}")
        
        except Exception as e:
            # Log any errors encountered during processing
            print(f"Error processing {filename}: {e}")


Processed and saved: cropped_2\ADANIPORTS.csv
Processed and saved: cropped_2\ASIANPAINT.csv
Processed and saved: cropped_2\AXISBANK.csv
Processed and saved: cropped_2\BAJAJ-AUTO.csv
Processed and saved: cropped_2\BAJAJFINSV.csv
Processed and saved: cropped_2\BAJFINANCE.csv
Processed and saved: cropped_2\BHARTIARTL.csv
Processed and saved: cropped_2\BPCL.csv
Processed and saved: cropped_2\BRITANNIA.csv
Processed and saved: cropped_2\CIPLA.csv
Processed and saved: cropped_2\COALINDIA.csv
Processed and saved: cropped_2\DRREDDY.csv
Processed and saved: cropped_2\EICHERMOT.csv
Processed and saved: cropped_2\GAIL.csv
Processed and saved: cropped_2\GRASIM.csv
Processed and saved: cropped_2\HCLTECH.csv
Processed and saved: cropped_2\HDFC.csv
Processed and saved: cropped_2\HDFCBANK.csv
Processed and saved: cropped_2\HEROMOTOCO.csv
Processed and saved: cropped_2\HINDALCO.csv
Processed and saved: cropped_2\HINDUNILVR.csv
Processed and saved: cropped_2\ICICIBANK.csv
Processed and saved: cropped_2\

# Find Returns

In [3]:
import os
import pandas as pd

# Directories
filtered_directory = "cropped_2"  # Replace with your actual filtered files directory
output_directory = "return_2"      # Replace with your actual output directory

# Ensure the output directory exists
os.makedirs(output_directory, exist_ok=True)

# Function to calculate "Return" and create a new CSV file
def calculate_return(df):
    # Sort the DataFrame by Date to ensure proper order
    df = df.sort_values(by='Date').reset_index(drop=True)

    # Calculate the "Return" as (current Close - previous Close) / previous Close
    df['Return'] = df['Close'].pct_change()

    # Keep only the "Date" and "Return" columns
    return_df = df[['Date', 'Return']].dropna()

    return return_df

# Iterate over each filtered CSV file
for filename in os.listdir(filtered_directory):
    if filename.endswith(".csv"):
        file_path = os.path.join(filtered_directory, filename)
        
        try:
            # Read the filtered CSV file into a DataFrame
            df = pd.read_csv(file_path)
            
            # Calculate the return and create a new DataFrame
            return_df = calculate_return(df)
            
            # Create a new file name for the output
            output_file_path = os.path.join(output_directory, f"{filename}")
            
            # Save the new DataFrame to a CSV file
            return_df.to_csv(output_file_path, index=False)
            print(f"Processed and saved: {output_file_path}")
        
        except Exception as e:
            # Log any errors encountered during processing
            print(f"Error processing {filename}: {e}")


Processed and saved: return_2\ADANIPORTS.csv
Processed and saved: return_2\ASIANPAINT.csv
Processed and saved: return_2\AXISBANK.csv
Processed and saved: return_2\BAJAJ-AUTO.csv
Processed and saved: return_2\BAJAJFINSV.csv
Processed and saved: return_2\BAJFINANCE.csv
Processed and saved: return_2\BHARTIARTL.csv
Processed and saved: return_2\BPCL.csv
Processed and saved: return_2\BRITANNIA.csv
Processed and saved: return_2\CIPLA.csv
Processed and saved: return_2\COALINDIA.csv
Processed and saved: return_2\DRREDDY.csv
Processed and saved: return_2\EICHERMOT.csv
Processed and saved: return_2\GAIL.csv
Processed and saved: return_2\GRASIM.csv
Processed and saved: return_2\HCLTECH.csv
Processed and saved: return_2\HDFC.csv
Processed and saved: return_2\HDFCBANK.csv
Processed and saved: return_2\HEROMOTOCO.csv
Processed and saved: return_2\HINDALCO.csv
Processed and saved: return_2\HINDUNILVR.csv
Processed and saved: return_2\ICICIBANK.csv
Processed and saved: return_2\INDUSINDBK.csv
Processe

In [4]:
import os

# Directory containing the files
directory = "return_2"  # Replace with the actual directory path

# List to store file names without extensions
file_names = []

# Iterate over each file in the directory
for filename in os.listdir(directory):
    # Check if it's a file (not a directory)
    if os.path.isfile(os.path.join(directory, filename)):
        # Split the file name and extension, and store the file name without extension
        name, _ = os.path.splitext(filename)
        file_names.append(name)

# Print the list of file names without extensions
for name in file_names:
    print(name)

# file_names


ADANIPORTS
ASIANPAINT
AXISBANK
BAJAJ-AUTO
BAJAJFINSV
BAJFINANCE
BHARTIARTL
BPCL
BRITANNIA
CIPLA
COALINDIA
DRREDDY
EICHERMOT
GAIL
GRASIM
HCLTECH
HDFC
HDFCBANK
HEROMOTOCO
HINDALCO
HINDUNILVR
ICICIBANK
INDUSINDBK
INFY
IOC
ITC
JSWSTEEL
KOTAKBANK
LT
MARUTI
MM
NESTLEIND
NIFTY50_all
NTPC
ONGC
POWERGRID
RELIANCE
SBIN
SHREECEM
SUNPHARMA
TATAMOTORS
TATASTEEL
TCS
TECHM
TITAN
ULTRACEMCO
UPL
VEDL
WIPRO
ZEEL


# Check shape of all return data

In [5]:
import os
import pandas as pd

# Directory containing the CSV files
directory = "return_2"  # Replace with the actual directory path

# Dictionary to store the shape of each DataFrame
file_shapes = {}

# Iterate over each file in the directory
for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        # Construct full file path
        file_path = os.path.join(directory, filename)
        
        try:
            # Read the CSV file into a DataFrame
            df = pd.read_csv(file_path)
            
            # Get the file name without extension
            file_name_without_ext = os.path.splitext(filename)[0]
            
            # Store the shape (rows, columns) of the DataFrame
            file_shapes[file_name_without_ext] = df.shape
        
        except Exception as e:
            print(f"Error processing {filename}: {e}")

# Print the dictionary of file shapes
print(file_shapes)


{'ADANIPORTS': (374, 2), 'ASIANPAINT': (374, 2), 'AXISBANK': (374, 2), 'BAJAJ-AUTO': (374, 2), 'BAJAJFINSV': (374, 2), 'BAJFINANCE': (374, 2), 'BHARTIARTL': (374, 2), 'BPCL': (374, 2), 'BRITANNIA': (374, 2), 'CIPLA': (374, 2), 'COALINDIA': (374, 2), 'DRREDDY': (374, 2), 'EICHERMOT': (374, 2), 'GAIL': (374, 2), 'GRASIM': (374, 2), 'HCLTECH': (374, 2), 'HDFC': (374, 2), 'HDFCBANK': (374, 2), 'HEROMOTOCO': (374, 2), 'HINDALCO': (374, 2), 'HINDUNILVR': (374, 2), 'ICICIBANK': (374, 2), 'INDUSINDBK': (374, 2), 'INFY': (374, 2), 'IOC': (374, 2), 'ITC': (374, 2), 'JSWSTEEL': (374, 2), 'KOTAKBANK': (374, 2), 'LT': (374, 2), 'MARUTI': (374, 2), 'MM': (374, 2), 'NESTLEIND': (374, 2), 'NIFTY50_all': (18374, 2), 'NTPC': (374, 2), 'ONGC': (374, 2), 'POWERGRID': (374, 2), 'RELIANCE': (374, 2), 'SBIN': (374, 2), 'SHREECEM': (374, 2), 'SUNPHARMA': (374, 2), 'TATAMOTORS': (374, 2), 'TATASTEEL': (374, 2), 'TCS': (374, 2), 'TECHM': (374, 2), 'TITAN': (374, 2), 'ULTRACEMCO': (374, 2), 'UPL': (374, 2), 'VED

# Make the combined REturn Data

In [10]:
import pandas as pd
import os

# Set the directory containing the CSV files
input_directory = 'selected_2'  # Replace with your actual directory path

# List all CSV files in the directory
files = [f for f in os.listdir(input_directory) if f.endswith('.csv')]

# Initialize a list to store the "Return" columns
return_columns = []

# Loop through each file and extract the "Return" column
for file in files:
    file_path = os.path.join(input_directory, file)
    try:
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path)
        
        # Check if the "Return" column exists
        if 'Return' in df.columns:
            # Append the "Return" column to the list
            return_columns.append(df[['Return']])
        else:
            print(f"Warning: 'Return' column not found in {file}")
    except Exception as e:
        print(f"Error reading {file}: {e}")

# Concatenate the "Return" columns horizontally
combined_df = pd.concat(return_columns, axis=1)

# Print the shape of the combined DataFrame
print(f"Shape of combined DataFrame: {combined_df.shape}")

# Save the combined DataFrame to a new CSV file (optional)
output_file = 'selected_2/combined_returns.csv'
combined_df.to_csv(output_file, index=False)


Shape of combined DataFrame: (374, 10)


# Now Goto the main.R

## After getting weights paste them here

In [14]:
weights = [8.948917e-02, 1.150844e-01, 1.003946e-01, 1.750287e-01, 3.027476e-02, 7.418727e-03, 
           1.449852e-18, 2.056144e-01, 2.766953e-01, 0.0]

# Now `numbers` is a Python list containing the values.
money_to_invest= 1_000_000

In [15]:
money_to_invest

1000000

In [20]:
money_to_all=[]
for i in weights:
    money_to_all.append(money_to_invest*i)

In [21]:
money_to_all

[89489.17000000001,
 115084.40000000001,
 100394.6,
 175028.7,
 30274.760000000002,
 7418.727,
 1.449852e-12,
 205614.4,
 276695.3,
 0.0]

In [22]:
import os

# Directory containing the files
directory = "selected_2"  # Replace with the actual directory path

# List to store file names without extensions
file_names = []

# Iterate over each file in the directory
for filename in os.listdir(directory):
    # Check if it's a file (not a directory)
    if os.path.isfile(os.path.join(directory, filename)):
        # Split the file name and extension, and store the file name without extension
        name, _ = os.path.splitext(filename)
        file_names.append(name)

# Print the list of file names without extensions
# for name in file_names:
#     print(name)

file_names


['BRITANNIA',
 'CIPLA',
 'COALINDIA',
 'combined_returns',
 'DRREDDY',
 'HDFCBANK',
 'HEROMOTOCO',
 'ICICIBANK',
 'NESTLEIND',
 'POWERGRID',
 'TATAMOTORS']

In [25]:
import os
import pandas as pd

# Folder containing your CSV files
input_directory = "data"  # Replace with your actual input directory

# List of CSV files to select
selected_files = [
    'BRITANNIA', 'CIPLA', 'COALINDIA', 'DRREDDY', 
    'HDFCBANK', 'HEROMOTOCO', 'ICICIBANK', 'NESTLEIND', 'POWERGRID', 'TATAMOTORS'
]

# Dates to select
date1 = pd.to_datetime("2020-10-30")
date2 = pd.to_datetime("2021-04-30")

# Lists to store "Close" values
close_values_date1 = []
close_values_date2 = []

# Iterate over each selected file
for file_name in selected_files:
    file_path = os.path.join(input_directory, f"{file_name}.csv")
    
    try:
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path)
        
        # Convert the "Date" column to datetime
        df['Date'] = pd.to_datetime(df['Date'])
        
        # Get "Close" values for the specific dates
        close_value_date1 = df.loc[df['Date'] == date1, 'Close'].values
        close_value_date2 = df.loc[df['Date'] == date2, 'Close'].values
        
        # Check if the date exists in the DataFrame and add to lists
        if close_value_date1.size > 0:
            close_values_date1.append(close_value_date1[0])
        else:
            close_values_date1.append(None)  # Append None if the date is not found
        
        if close_value_date2.size > 0:
            close_values_date2.append(close_value_date2[0])
        else:
            close_values_date2.append(None)  # Append None if the date is not found
    
    except Exception as e:
        print(f"Error processing {file_name}: {e}")

# Output the lists
print("Close values on 2020-05-03:", close_values_date1)
print("Close values on 2021-04-30:", close_values_date2)


Close values on 2020-05-03: [3473.25, 754.5, 114.2, 4888.65, 1183.55, 2799.8, 392.6, 17161.6, 171.0, 132.65]
Close values on 2021-04-30: [3449.0, 910.35, 133.05, 5163.1, 1412.3, 2819.15, 600.5, 16309.25, 220.05, 293.85]


In [30]:
# Element-wise integer division
result = [a // b for a, b in zip(money_to_all,close_values_date1)]

print(result)

[25.0, 152.0, 879.0, 35.0, 25.0, 2.0, 0.0, 11.0, 1618.0, 0.0]


In [31]:
# Element-wise integer division
result2 = [a * b for a, b in zip(result,close_values_date2)]

print(result2)

[86225.0, 138373.2, 116950.95000000001, 180708.5, 35307.5, 5638.3, 0.0, 179401.75, 356040.9, 0.0]


In [32]:
sum(result2)

1098646.1

In [34]:
sum(result2) - money_to_invest

98646.1000000001