In [39]:
import os
import pandas as pd

# Input and output directories
input_directory = "data"  # Replace with your actual input directory
output_directory = "cropped"  # Replace with your actual output directory

# Ensure the output directory exists
os.makedirs(output_directory, exist_ok=True)

# Target date for filtering
filter_date = pd.to_datetime('2019-04-30')

# Iterate over each file in the input directory
for filename in os.listdir(input_directory):
    if filename.endswith(".csv"):
        file_path = os.path.join(input_directory, filename)
        
        try:
            # Read the CSV file into a DataFrame
            df = pd.read_csv(file_path)
            
            # Convert the "Date" column to datetime
            df['Date'] = pd.to_datetime(df['Date'])
            
            # Filter the DataFrame for dates on or after '2019-04-30'
            filtered_df = df[df['Date'] >= filter_date].loc[:, ["Date", "Open", "High", "Low", "Close"]]
            
            # Create a new file name for the output
            output_file_path = os.path.join(output_directory, f"{filename}")
            
            # Save the filtered DataFrame to a new CSV file
            filtered_df.to_csv(output_file_path, index=False)
            print(f"Processed and saved: {output_file_path}")
        
        except Exception as e:
            # Log any errors encountered during processing
            print(f"Error processing {filename}: {e}")


Processed and saved: cropped\ADANIPORTS.csv
Processed and saved: cropped\ASIANPAINT.csv
Processed and saved: cropped\AXISBANK.csv
Processed and saved: cropped\BAJAJ-AUTO.csv
Processed and saved: cropped\BAJAJFINSV.csv
Processed and saved: cropped\BAJFINANCE.csv
Processed and saved: cropped\BHARTIARTL.csv
Processed and saved: cropped\BPCL.csv
Processed and saved: cropped\BRITANNIA.csv
Processed and saved: cropped\CIPLA.csv
Processed and saved: cropped\COALINDIA.csv
Processed and saved: cropped\DRREDDY.csv
Processed and saved: cropped\EICHERMOT.csv
Processed and saved: cropped\GAIL.csv
Processed and saved: cropped\GRASIM.csv
Processed and saved: cropped\HCLTECH.csv
Processed and saved: cropped\HDFC.csv
Processed and saved: cropped\HDFCBANK.csv
Processed and saved: cropped\HEROMOTOCO.csv
Processed and saved: cropped\HINDALCO.csv
Processed and saved: cropped\HINDUNILVR.csv
Processed and saved: cropped\ICICIBANK.csv
Processed and saved: cropped\INDUSINDBK.csv
Processed and saved: cropped\IN

In [41]:
import os
import pandas as pd

# Directories
filtered_directory = "cropped"  # Replace with your actual filtered files directory
output_directory = "return"      # Replace with your actual output directory

# Ensure the output directory exists
os.makedirs(output_directory, exist_ok=True)

# Function to calculate "Return" and create a new CSV file
def calculate_return(df):
    # Sort the DataFrame by Date to ensure proper order
    df = df.sort_values(by='Date').reset_index(drop=True)

    # Calculate the "Return" as (current Close - previous Close) / previous Close
    df['Return'] = df['Close'].pct_change()

    # Keep only the "Date" and "Return" columns
    return_df = df[['Date', 'Return']].dropna()

    return return_df

# Iterate over each filtered CSV file
for filename in os.listdir(filtered_directory):
    if filename.endswith(".csv"):
        file_path = os.path.join(filtered_directory, filename)
        
        try:
            # Read the filtered CSV file into a DataFrame
            df = pd.read_csv(file_path)
            
            # Calculate the return and create a new DataFrame
            return_df = calculate_return(df)
            
            # Create a new file name for the output
            output_file_path = os.path.join(output_directory, f"{filename}")
            
            # Save the new DataFrame to a CSV file
            return_df.to_csv(output_file_path, index=False)
            print(f"Processed and saved: {output_file_path}")
        
        except Exception as e:
            # Log any errors encountered during processing
            print(f"Error processing {filename}: {e}")


Processed and saved: return\ADANIPORTS.csv
Processed and saved: return\ASIANPAINT.csv
Processed and saved: return\AXISBANK.csv
Processed and saved: return\BAJAJ-AUTO.csv
Processed and saved: return\BAJAJFINSV.csv
Processed and saved: return\BAJFINANCE.csv
Processed and saved: return\BHARTIARTL.csv
Processed and saved: return\BPCL.csv
Processed and saved: return\BRITANNIA.csv
Processed and saved: return\CIPLA.csv
Processed and saved: return\COALINDIA.csv
Processed and saved: return\DRREDDY.csv
Processed and saved: return\EICHERMOT.csv
Processed and saved: return\GAIL.csv
Processed and saved: return\GRASIM.csv
Processed and saved: return\HCLTECH.csv
Processed and saved: return\HDFC.csv
Processed and saved: return\HDFCBANK.csv
Processed and saved: return\HEROMOTOCO.csv
Processed and saved: return\HINDALCO.csv
Processed and saved: return\HINDUNILVR.csv
Processed and saved: return\ICICIBANK.csv
Processed and saved: return\INDUSINDBK.csv
Processed and saved: return\INFY.csv
Processed and sav

In [44]:
import os

# Directory containing the files
directory = "return"  # Replace with the actual directory path

# List to store file names without extensions
file_names = []

# Iterate over each file in the directory
for filename in os.listdir(directory):
    # Check if it's a file (not a directory)
    if os.path.isfile(os.path.join(directory, filename)):
        # Split the file name and extension, and store the file name without extension
        name, _ = os.path.splitext(filename)
        file_names.append(name)

# Print the list of file names without extensions
for name in file_names:
    print(name)

# file_names


ADANIPORTS
ASIANPAINT
AXISBANK
BAJAJ-AUTO
BAJAJFINSV
BAJFINANCE
BHARTIARTL
BPCL
BRITANNIA
CIPLA
COALINDIA
DRREDDY
EICHERMOT
GAIL
GRASIM
HCLTECH
HDFC
HDFCBANK
HEROMOTOCO
HINDALCO
HINDUNILVR
ICICIBANK
INDUSINDBK
INFY
IOC
ITC
JSWSTEEL
KOTAKBANK
LT
MARUTI
MM
NESTLEIND
NIFTY50_all
NTPC
ONGC
POWERGRID
RELIANCE
SBIN
SHREECEM
SUNPHARMA
TATAMOTORS
TATASTEEL
TCS
TECHM
TITAN
ULTRACEMCO
UPL
VEDL
WIPRO
ZEEL


In [45]:
import os
import pandas as pd

# Directory containing the CSV files
directory = "return"  # Replace with the actual directory path

# Dictionary to store the shape of each DataFrame
file_shapes = {}

# Iterate over each file in the directory
for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        # Construct full file path
        file_path = os.path.join(directory, filename)
        
        try:
            # Read the CSV file into a DataFrame
            df = pd.read_csv(file_path)
            
            # Get the file name without extension
            file_name_without_ext = os.path.splitext(filename)[0]
            
            # Store the shape (rows, columns) of the DataFrame
            file_shapes[file_name_without_ext] = df.shape
        
        except Exception as e:
            print(f"Error processing {filename}: {e}")

# Print the dictionary of file shapes
print(file_shapes)


{'ADANIPORTS': (496, 2), 'ASIANPAINT': (496, 2), 'AXISBANK': (496, 2), 'BAJAJ-AUTO': (496, 2), 'BAJAJFINSV': (496, 2), 'BAJFINANCE': (496, 2), 'BHARTIARTL': (496, 2), 'BPCL': (496, 2), 'BRITANNIA': (496, 2), 'CIPLA': (496, 2), 'COALINDIA': (496, 2), 'DRREDDY': (496, 2), 'EICHERMOT': (496, 2), 'GAIL': (496, 2), 'GRASIM': (496, 2), 'HCLTECH': (496, 2), 'HDFC': (496, 2), 'HDFCBANK': (496, 2), 'HEROMOTOCO': (496, 2), 'HINDALCO': (496, 2), 'HINDUNILVR': (496, 2), 'ICICIBANK': (496, 2), 'INDUSINDBK': (496, 2), 'INFY': (496, 2), 'IOC': (496, 2), 'ITC': (496, 2), 'JSWSTEEL': (496, 2), 'KOTAKBANK': (496, 2), 'LT': (496, 2), 'MARUTI': (496, 2), 'MM': (496, 2), 'NESTLEIND': (496, 2), 'NIFTY50_all': (24352, 2), 'NTPC': (496, 2), 'ONGC': (496, 2), 'POWERGRID': (496, 2), 'RELIANCE': (496, 2), 'SBIN': (496, 2), 'SHREECEM': (496, 2), 'SUNPHARMA': (496, 2), 'TATAMOTORS': (496, 2), 'TATASTEEL': (496, 2), 'TCS': (496, 2), 'TECHM': (496, 2), 'TITAN': (496, 2), 'ULTRACEMCO': (496, 2), 'UPL': (496, 2), 'VED

In [46]:
import pandas as pd
import os

# Set the directory containing the CSV files
input_directory = 'selected'  # Replace with your actual directory path

# List all CSV files in the directory
files = [f for f in os.listdir(input_directory) if f.endswith('.csv')]

# Initialize a list to store the "Return" columns
return_columns = []

# Loop through each file and extract the "Return" column
for file in files:
    file_path = os.path.join(input_directory, file)
    try:
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path)
        
        # Check if the "Return" column exists
        if 'Return' in df.columns:
            # Append the "Return" column to the list
            return_columns.append(df[['Return']])
        else:
            print(f"Warning: 'Return' column not found in {file}")
    except Exception as e:
        print(f"Error reading {file}: {e}")

# Concatenate the "Return" columns horizontally
combined_df = pd.concat(return_columns, axis=1)

# Print the shape of the combined DataFrame
print(f"Shape of combined DataFrame: {combined_df.shape}")

# Save the combined DataFrame to a new CSV file (optional)
output_file = 'selected/combined_returns.csv'
combined_df.to_csv(output_file, index=False)


Shape of combined DataFrame: (496, 10)
