In [25]:
# Cell 1: Import libraries and setup
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import os

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("Libraries imported successfully!")

Libraries imported successfully!


In [26]:
# Cell 2: Load your CSV files
# Make sure these files are in the same directory as your notebook
finance_df = pd.read_csv('D:\Projects\Python projects\hackathon\FinanceData_combined.csv')
weather_df = pd.read_csv('D:\Projects\Python projects\hackathon\WeatherData_combined.csv')

print("Finance Data Shape:", finance_df.shape)
print("Weather Data Shape:", weather_df.shape)

Finance Data Shape: (200, 8)
Weather Data Shape: (2, 18)


  finance_df = pd.read_csv('D:\Projects\Python projects\hackathon\FinanceData_combined.csv')
  weather_df = pd.read_csv('D:\Projects\Python projects\hackathon\WeatherData_combined.csv')


In [13]:
# Cell 3: Data preprocessing for Finance
# Convert datetime and sort
finance_df['Datetime'] = pd.to_datetime(finance_df['Datetime'])
finance_df = finance_df.sort_values('Datetime')

# Add useful calculated columns
finance_df['Daily_Return'] = finance_df.groupby('Symbol')['Close'].pct_change() * 100
finance_df['Price_Range'] = finance_df['High'] - finance_df['Low']
finance_df['Volume_MA'] = finance_df.groupby('Symbol')['Volume'].rolling(5).mean().reset_index(0, drop=True)

print("Finance data preprocessing completed!")
finance_df.head()

Finance data preprocessing completed!


Unnamed: 0,Symbol,Datetime,Open,High,Low,Close,Volume,fetched_at_utc,Daily_Return,Price_Range,Volume_MA
99,MSFT,2025-07-03,493.81,500.13,493.44,498.84,13984829,2025-11-22 19:37:22.568050,,6.69,
98,MSFT,2025-07-07,497.38,498.75,495.225,497.72,13981605,2025-11-22 19:37:22.567835,-0.224521,3.525,
97,MSFT,2025-07-08,497.24,498.2,494.11,496.62,11846586,2025-11-22 19:37:22.567599,-0.221008,4.09,
96,MSFT,2025-07-09,500.3,506.78,499.74,503.51,18659538,2025-11-22 19:37:22.567372,1.387379,7.04,
95,MSFT,2025-07-10,503.05,504.44,497.75,501.48,16498740,2025-11-22 19:37:22.567117,-0.40317,6.69,14994259.6


In [27]:
datetime_cols = ['Time_of_Record', 'Sunrise', 'Sunset', 'fetched_at_utc']
for col in datetime_cols:
    if col in weather_df.columns:
        weather_df[col] = pd.to_datetime(weather_df[col])

# Convert temperatures from Kelvin to Celsius
def kelvin_to_celsius(temp_k):
    return temp_k - 273.15

# Apply temperature conversion
temp_columns = ['Temp_C', 'FeelsLike_C', 'MinTemp_C', 'MaxTemp_C']
for col in temp_columns:
    if col in weather_df.columns:
        weather_df[col] = weather_df[col].apply(kelvin_to_celsius)

# Update Fahrenheit columns based on converted Celsius
weather_df['Temp_F'] = weather_df['Temp_C'].apply(lambda x: (x * 9/5) + 32)
weather_df['FeelsLike_F'] = weather_df['FeelsLike_C'].apply(lambda x: (x * 9/5) + 32)
weather_df['MinTemp_F'] = weather_df['MinTemp_C'].apply(lambda x: (x * 9/5) + 32)
weather_df['MaxTemp_F'] = weather_df['MaxTemp_C'].apply(lambda x: (x * 9/5) + 32)

# Add day/night indicator
weather_df['Is_Daytime'] = (weather_df['Time_of_Record'] > weather_df['Sunrise']) & (weather_df['Time_of_Record'] < weather_df['Sunset'])

print("Weather data preprocessing completed!")
print(f"Number of cities: {len(weather_df)}")
print(f"Cities: {', '.join(weather_df['City'].unique())}")
print(f"\nTemperature ranges after conversion:")
print(f"Temp_C: {weather_df['Temp_C'].min():.1f}°C to {weather_df['Temp_C'].max():.1f}°C")
print(f"Temp_F: {weather_df['Temp_F'].min():.1f}°F to {weather_df['Temp_F'].max():.1f}°F")

weather_df[['City', 'Country', 'Description', 'Temp_C', 'Temp_F', 'Humidity', 'WindSpeed']].head()

Weather data preprocessing completed!
Number of cities: 2
Cities: London, Karachi

Temperature ranges after conversion:
Temp_C: 7.7°C to 17.9°C
Temp_F: 45.9°F to 64.2°F


Unnamed: 0,City,Country,Description,Temp_C,Temp_F,Humidity,WindSpeed
0,London,GB,light rain,7.72,45.896,92,4.12
1,Karachi,PK,smoke,17.9,64.22,77,2.57


In [28]:
# Cell 5: Finance Visualization 1 - Stock Price Trends
def plot_stock_comparison(df):
    symbols = df['Symbol'].unique()
    
    fig = go.Figure()
    for symbol in symbols:
        symbol_df = df[df['Symbol'] == symbol]
        fig.add_trace(go.Scatter(
            x=symbol_df['Datetime'],
            y=symbol_df['Close'],
            name=symbol,
            mode='lines'
        ))
    
    fig.update_layout(
        title='Stock Price Comparison',
        xaxis_title='Date',
        yaxis_title='Closing Price (USD)',
        hovermode='x unified'
    )
    fig.show()

plot_stock_comparison(finance_df)

In [29]:
# Cell 6: Finance Visualization 2 - Candlestick Charts
def plot_interactive_candlestick(df, symbol):
    symbol_df = df[df['Symbol'] == symbol].sort_values('Datetime')
    
    fig = go.Figure(data=[go.Candlestick(
        x=symbol_df['Datetime'],
        open=symbol_df['Open'],
        high=symbol_df['High'],
        low=symbol_df['Low'],
        close=symbol_df['Close'],
        name=symbol
    )])
    
    fig.update_layout(
        title=f'{symbol} Candlestick Chart',
        xaxis_title='Date',
        yaxis_title='Price (USD)',
        xaxis_rangeslider_visible=False,
        height=500
    )
    fig.show()

# Plot for each symbol
for symbol in finance_df['Symbol'].unique():
    plot_interactive_candlestick(finance_df, symbol)

In [30]:
# Cell 7: Finance Visualization 3 - Volume Analysis
def plot_volume_analysis(df):
    symbols = df['Symbol'].unique()
    
    fig = go.Figure()
    for symbol in symbols:
        symbol_df = df[df['Symbol'] == symbol]
        fig.add_trace(go.Bar(
            x=symbol_df['Datetime'],
            y=symbol_df['Volume'],
            name=f'{symbol} Volume',
            opacity=0.6
        ))
    
    fig.update_layout(
        title='Trading Volume Comparison',
        xaxis_title='Date',
        yaxis_title='Volume',
        barmode='group'
    )
    fig.show()

plot_volume_analysis(finance_df)

In [37]:
import importlib.util
import sys

# Specify the path to your module
module_path = r"D:\Projects\Python projects\hackathon\weather_finance_etl\analysis\finance_analysis.py"

# Load the module
spec = importlib.util.spec_from_file_location("finance_analysis", module_path)
finance_analysis = importlib.util.module_from_spec(spec)
spec.loader.exec_module(finance_analysis)

# Now use the function
plot_stock = finance_analysis.plot_stock

# Use the function
for symbol in finance_df['Symbol'].unique():
    print(f"📈 Generating candlestick chart for {symbol}...")
    fig = plot_stock(finance_df, symbol)
    fig.show()

📈 Generating candlestick chart for MSFT...


📈 Generating candlestick chart for GOOGL...


In [38]:
# Cell 9: Weather Visualization 1 - Temperature Comparison
def plot_temperature_dashboard(df):
    fig = px.scatter(df, 
                     x='City', 
                     y='Temp_C',
                     size='Humidity',
                     color='Country',
                     hover_data=['Description', 'FeelsLike_C'],
                     title='Temperature by City with Humidity Indicator')
    fig.show()

plot_temperature_dashboard(weather_df)

In [39]:
# Cell 10: Weather Visualization 2 - Weather Metrics Radar
def plot_weather_radar_comparison(df):
    cities = df['City'].unique()
    
    fig = go.Figure()
    
    for city in cities:
        city_data = df[df['City'] == city].iloc[0]
        metrics = ['Temp_C', 'Humidity', 'Pressure', 'WindSpeed']
        values = [city_data[metric] for metric in metrics]
        
        # Normalize values for radar chart
        normalized_values = [(val - df[metric].min()) / (df[metric].max() - df[metric].min()) * 100 
                           for val, metric in zip(values, metrics)]
        
        fig.add_trace(go.Scatterpolar(
            r=normalized_values,
            theta=metrics,
            fill='toself',
            name=city
        ))
    
    fig.update_layout(
        polar=dict(
            radialaxis=dict(visible=True, range=[0, 100])
        ),
        title='Weather Metrics Radar Comparison',
        showlegend=True
    )
    fig.show()

plot_weather_radar_comparison(weather_df)

In [40]:
# Cell 11: Weather Visualization 3 - Correlation Heatmap
def plot_weather_correlations(df):
    # Select numeric columns for correlation
    numeric_cols = ['Temp_C', 'FeelsLike_C', 'Pressure', 'Humidity', 'WindSpeed']
    correlation_matrix = df[numeric_cols].corr()
    
    fig = px.imshow(correlation_matrix,
                    title='Weather Metrics Correlation Heatmap',
                    color_continuous_scale='RdBu_r',
                    aspect='auto',
                    text_auto=True)
    fig.show()

plot_weather_correlations(weather_df)

In [41]:
# Cell 12: Weather Visualization 4 - Pressure vs Temperature
def plot_pressure_temp_relationship(df):
    fig = px.scatter(df, 
                     x='Temp_C', 
                     y='Pressure',
                     color='City',
                     size='Humidity',
                     hover_data=['Description', 'WindSpeed'],
                     title='Pressure vs Temperature Relationship',
                     trendline='lowess')
    fig.show()

plot_pressure_temp_relationship(weather_df)

In [None]:
# Cell 13: Combined Analysis - Time Series for both datasets
def plot_combined_analysis(finance_df, weather_df):
    # Create subplots
    fig = go.Figure()
    
    # Add finance data (normalized)
    if not finance_df.empty:
        for symbol in finance_df['Symbol'].unique():
            symbol_df = finance_df[finance_df['Symbol'] == symbol]
            normalized_close = (symbol_df['Close'] - symbol_df['Close'].min()) / (symbol_df['Close'].max() - symbol_df['Close'].min())
            fig.add_trace(go.Scatter(
                x=symbol_df['Datetime'],
                y=normalized_close,
                name=f'{symbol} (Normalized)',
                line=dict(dash='dot')
            ))
    
    if not weather_df.empty:
        normalized_temp = (weather_df['Temp_C'] - weather_df['Temp_C'].min()) / (weather_df['Temp_C'].max() - weather_df['Temp_C'].min())
        fig.add_trace(go.Scatter(
            x=weather_df['Time_of_Record'],
            y=normalized_temp,
            name='Temperature (Normalized)',
            line=dict(color='red')
        ))
    
    fig.update_layout(
        title='Combined Analysis: Stock Prices vs Temperature (Normalized)',
        xaxis_title='Date',
        yaxis_title='Normalized Values',
        hovermode='x unified'
    )
    fig.show()

plot_combined_analysis(finance_df, weather_df)

In [59]:
# Cell 14: Summary Statistics
print("📊 SUMMARY STATISTICS")
print("=" * 60)

print("\n📈 FINANCE DATA:")
print("-" * 30)
if not finance_df.empty:
    for symbol in finance_df['Symbol'].unique():
        symbol_df = finance_df[finance_df['Symbol'] == symbol]
        print(f"\n{symbol}:")
        print(f"  Records: {len(symbol_df)}")
        print(f"  Avg Close: ${symbol_df['Close'].mean():.2f}")
        print(f"  Total Volume: {symbol_df['Volume'].sum():,}")
else:
    print("  No finance data available")

print("\n🌤️ WEATHER DATA:")
print("-" * 30)
if not weather_df.empty:
    print(f"Cities: {', '.join(weather_df['City'].unique())}")
    print(f"Countries: {', '.join([str(c) for c in weather_df['Country'].unique()])}")
    print(f"\nTemperature Stats:")
    print(f"  Average: {weather_df['Temp_C'].mean():.1f}°C")
    print(f"  Range: {weather_df['Temp_C'].min():.1f}°C to {weather_df['Temp_C'].max():.1f}°C")
    print(f"  Average Humidity: {weather_df['Humidity'].mean():.1f}%")
    print(f"  Weather Conditions: {', '.join(weather_df['Description'].unique())}")
else:
    print("  No weather data available")

📊 SUMMARY STATISTICS

📈 FINANCE DATA:
------------------------------

MSFT:
  Records: 100
  Avg Close: $511.64
  Total Volume: 2,084,460,420

GOOGL:
  Records: 100
  Avg Close: $231.81
  Total Volume: 3,573,836,498

🌤️ WEATHER DATA:
------------------------------
Cities: London, Karachi
Countries: GB, PK

Temperature Stats:
  Average: 12.8°C
  Range: 7.7°C to 17.9°C
  Average Humidity: 84.5%
  Weather Conditions: light rain, smoke
