In [3]:
%pip install matplotlib seaborn

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

Collecting seaborn
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Downloading seaborn-0.13.2-py3-none-any.whl (294 kB)
Installing collected packages: seaborn
Successfully installed seaborn-0.13.2
Note: you may need to restart the kernel to use updated packages.


In [5]:
# Define file paths relative to the project root
raw_data_path = 'data/raw/COMBINED-GROUND DATA.xlsx'
processed_data_path = 'data/processed/CPCB_Ground_Daily_Filled.csv'

In [14]:
import os
import pandas as pd

# === UNIVERSAL PATH SETUP (works in .py or .ipynb) ===
def get_project_root():
    try:
        # Works when running as a .py script
        root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    except NameError:
        # Works when running inside a Jupyter notebook
        root = os.path.dirname(os.getcwd())
    return root

root_dir = get_project_root()

# Build reliable file paths
raw_data_path = os.path.join(root_dir, "data", "raw", "COMBINED-GROUND DATA.xlsx")
processed_data_path = os.path.join(root_dir, "data", "processed", "CPCB_Ground_Daily_Filled.csv")

# === DEBUG INFO ===
print("üîç Checking file paths:")
print("Project root:", root_dir)
print("Raw data path:", raw_data_path)
print("Processed data path:", processed_data_path)
print("Exists raw?", os.path.exists(raw_data_path))
print("Exists processed?", os.path.exists(processed_data_path))

# === LOAD DATA ===
if os.path.exists(raw_data_path):
    ext = os.path.splitext(raw_data_path)[1].lower()
    if ext in ('.xlsx', '.xls'):
        cpcb_df = pd.read_excel(raw_data_path)
    elif ext == '.csv':
        cpcb_df = pd.read_csv(raw_data_path)
    else:
        raise ValueError(f"Unsupported file extension: {ext} for file {raw_data_path}")
    print(f"‚úÖ Data loaded from {raw_data_path}. Shape: {cpcb_df.shape}")

elif os.path.exists(processed_data_path):
    print(f"‚ö†Ô∏è Raw data file not found at {raw_data_path}. Loading processed data instead...")
    cpcb_df = pd.read_csv(processed_data_path)
    print(f"‚úÖ Processed data loaded. Shape: {cpcb_df.shape}")

else:
    print(f"‚ùå Warning: Neither raw data ({raw_data_path}) nor processed data ({processed_data_path}) were found.")
    print("An empty DataFrame 'cpcb_df' has been created. Please check your file paths or provide the data files.")
    cpcb_df = pd.DataFrame()

# === SAMPLE OUTPUT ===
print("\nSample of CPCB DataFrame:")
print(cpcb_df.head())


üîç Checking file paths:
Project root: c:\Users\RAH\Desktop\Delhi-NCR-AQI-Assessment
Raw data path: c:\Users\RAH\Desktop\Delhi-NCR-AQI-Assessment\data\raw\COMBINED-GROUND DATA.xlsx
Processed data path: c:\Users\RAH\Desktop\Delhi-NCR-AQI-Assessment\data\processed\CPCB_Ground_Daily_Filled.csv
Exists raw? True
Exists processed? True
‚úÖ Data loaded from c:\Users\RAH\Desktop\Delhi-NCR-AQI-Assessment\data\raw\COMBINED-GROUND DATA.xlsx. Shape: (16937, 9)

Sample of CPCB DataFrame:
            Column1           Column2 Column3 Column4 Column5 Column6 Column7  \
0         From Date           To Date   PM2.5    PM10     NO2     SO2      CO   
1  01-01-2020 00:00  02-01-2020 00:00  367.38  449.58   54.76   10.76    3.65   
2  02-01-2020 00:00  03-01-2020 00:00  360.34  463.33   61.22    21.3    4.13   
3  03-01-2020 00:00  04-01-2020 00:00  394.86  500.24   46.96   22.12    2.93   
4  04-01-2020 00:00  05-01-2020 00:00  204.94  359.77   90.79    4.96    0.92   

  Column8                    Col

In [15]:
print("\n--- Basic Info ---")
cpcb_df.info()


--- Basic Info ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16937 entries, 0 to 16936
Data columns (total 9 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Column1  16937 non-null  object
 1   Column2  16784 non-null  object
 2   Column3  15919 non-null  object
 3   Column4  14267 non-null  object
 4   Column5  15906 non-null  object
 5   Column6  15867 non-null  object
 6   Column7  15569 non-null  object
 7   Column8  15958 non-null  object
 8   Column9  14820 non-null  object
dtypes: object(9)
memory usage: 1.2+ MB
