In [15]:
import pandas as pd
import numpy as np

# Load and preprocess the dataset
file_path = "C:\\Users\\Ankit Rai\\Downloads\\nse_all_stock_data (1).csv"
data = pd.read_csv(file_path)

# Convert 'Date' to datetime format and filter for 2021-2024
data['Date'] = pd.to_datetime(data['Date'], errors='coerce')
filtered_data = data[(data['Date'] >= '2021-01-01') & (data['Date'] <= '2024-12-31')]

# Check if the DataFrame is empty after filtering
if filtered_data.empty:
    raise ValueError("Filtered data is empty. Please check the input file and date column formatting.")

# Ensure all columns except 'Date' are numeric
for col in filtered_data.columns[1:]:
    filtered_data[col] = pd.to_numeric(filtered_data[col], errors='coerce')

# Forward fill missing data
filtered_data = filtered_data.fillna(method='ffill')

# Calculate a simple market index (daily average price of all stocks)
filtered_data['Market_Index'] = filtered_data.iloc[:, 1:].mean(axis=1)

# Calculate hourly market trends and add direction
def simulate_hourly_market_trends(data):
    hourly_data = []
    timestamps = []
    directions = []
    
    for i in range(len(data)):
        row = data.iloc[i]
        for hour in range(24):
            if i > 0:  # Calculate hourly change only if not the first day
                prev_row = data.iloc[i - 1]
                daily_change = (row['Market_Index'] - prev_row['Market_Index']) / 24
                hourly_market_index = prev_row['Market_Index'] + daily_change * hour
            else:
                hourly_market_index = row['Market_Index']
            
            # Add direction
            if len(hourly_data) > 0:
                previous_hour_value = hourly_data[-1]
                direction = "Increased" if hourly_market_index > previous_hour_value else "Decreased"
            else:
                direction = "No Change"  # For the first value
            
            hourly_data.append(hourly_market_index)
            timestamps.append(row['Date'] + pd.Timedelta(hours=hour))
            directions.append(direction)
    
    hourly_df = pd.DataFrame({
        'Timestamp': timestamps,
        'Hourly_Market_Index': hourly_data,
        'Direction': directions
    })
    return hourly_df

# Apply simulation to calculate hourly market trends
hourly_trends = simulate_hourly_market_trends(filtered_data)

# Save the result
hourly_trends.to_csv("hourly_market_trends_with_directions.csv", index=False)
print("Hourly market trends saved as 'hourly_market_trends_with_directions.csv'")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data[col] = pd.to_numeric(filtered_data[col], errors='coerce')


Hourly market trends saved as 'hourly_market_trends_with_directions.csv'


In [16]:
hourly_trends

Unnamed: 0,Timestamp,Hourly_Market_Index,Direction
0,2021-01-01 00:00:00,689.482551,No Change
1,2021-01-01 01:00:00,689.482551,Decreased
2,2021-01-01 02:00:00,689.482551,Decreased
3,2021-01-01 03:00:00,689.482551,Decreased
4,2021-01-01 04:00:00,689.482551,Decreased
...,...,...,...
20779,2024-07-05 19:00:00,1412.416915,Increased
20780,2024-07-05 20:00:00,1412.626295,Increased
20781,2024-07-05 21:00:00,1412.835675,Increased
20782,2024-07-05 22:00:00,1413.045055,Increased
