In [12]:
from openpyxl import load_workbook
import os

def get_sheet_names(file_path):
    workbook = load_workbook(filename=file_path, read_only=True)
    return workbook.sheetnames

# Example: read all Excel files in the current directory and print their sheet names
for file in os.listdir():
    if file.endswith('.xlsx'):
        print(f"File: {file}")
        try:
            sheet_names = get_sheet_names(file)
            print("Sheet names:", sheet_names)
        except Exception as e:
            print(f"Could not read {file}: {e}")

#read an Excel file and extract specific data
import pandas as pd
file_path = 'data_districts.xlsx'

#discard the first 15 rows and read the rest
data = pd.read_excel(file_path, skiprows=15)




File: data_districts.xlsx
Sheet names: ['Dehradun', 'Haridwar', 'Tehri', 'Pauri', 'Uttarakashi', 'Chamoli', 'Rudraprayag', 'Udham singh nagar', 'Nainital', 'Almora', 'Champawat', 'Bageshwar', 'Pithoragarh']
File: malaria.xlsx
Sheet names: ['Bageshwar', 'almora', 'Chamoli', 'Champawat', 'Dehradun', 'Garhwal ', 'Hardwar', 'Nainital', 'Pithoragarh', 'Rudraprayag', 'Tehri Garhwal', 'Udham Singh Nagar', 'Uttarkashi']


In [None]:
#strip different sheets for district wise data
#read name of each sheet
sheet_names = pd.ExcelFile(file_path).sheet_names

monthly_means = {}
for sheet in sheet_names:
    df = pd.read_excel(file_path, sheet_name=sheet, skiprows=15)
    # Discard columns PRECTOTCORR and T2M_RANGE at the beginning
    df = df.drop(columns=['PRECTOTCORR', 'T2M_RANGE'], errors='ignore')
    # Rename QV2M and RH2M columns
    df = df.rename(columns={'QV2M': 'Specific Humidity', 'RH2M': 'Relative Humidity'})
    df['Date'] = pd.to_datetime(dict(year=df['YEAR'], month=df['MO'], day=df['DY']))
    df['MonthStart'] = df['Date'].dt.to_period('M')
    # Group by MonthStart and take mean for each month
    df_monthly = df.groupby('MonthStart').mean(numeric_only=True).reset_index()
    # Keep only the year from MonthStart and rename to 'Year'
    df_monthly['Year'] = df_monthly['MonthStart'].dt.year
    df_monthly = df_monthly.drop(columns=['MonthStart', 'YEAR', 'DY'], errors='ignore')
    cols = ['Year'] + [col for col in df_monthly.columns if col != 'Year']
    df_monthly = df_monthly[cols]
    month_map = {1: 'January', 2: 'February', 3: 'March', 4: 'April', 5: 'May', 6: 'June',
                 7: 'July', 8: 'August', 9: 'September', 10: 'October', 11: 'November', 12: 'December'}
    if 'MO' in df_monthly.columns:
        df_monthly['MO'] = df_monthly['MO'].map(month_map)
    monthly_means[sheet] = df_monthly

    # Print the first few rows for each district
    print(f"Monthly mean for district '{sheet}':")
    print(df_monthly.head())

    #plot the data for each district
    import matplotlib.pyplot as plt 
    plt.figure(figsize=(10, 5))
    plt.plot(df_monthly['Year'], df_monthly['T2M'], marker='o', label='Mean Temperature (K)')
    plt.title(f'Monthly Means for {sheet}')
    plt.xlabel('Year')
    plt.ylabel('Values')
    plt.legend()
    plt.grid()
    #plt.savefig(f'{sheet}_monthly_means.png')
    plt.close()



Monthly mean for district 'Dehradun':
   Year        MO  Specific Humidity  Relative Humidity        T2M    T2M_MAX  \
0  1992   January           3.950968          48.841613   9.083871  16.162581   
1  1992  February           3.653103          45.224483   9.252414  16.864483   
2  1992     March           4.452581          39.366774  14.678710  22.217419   
3  1992     April           4.163000          25.766000  20.463000  28.606333   
4  1992       May           5.014839          24.709355  24.370000  32.347097   

     T2M_MIN  
0   4.435806  
1   3.713448  
2   8.774516  
3  13.079333  
4  16.524839  
Monthly mean for district 'Haridwar':
   Year        MO    T2M_MAX    T2M_MIN        T2M  Specific Humidity  \
0  1992   January  20.996129   8.474516  13.629677           4.444839   
1  1992  February  22.431379   8.848621  14.489655           4.280690   
2  1992     March  28.522581  14.344516  20.477097           4.989677   
3  1992     April  35.412000  19.358667  26.933000     

In [None]:
import matplotlib.pyplot as plt

for sheet, df_monthly in monthly_means.items():
    df_plot = df_monthly[df_monthly['Year'] >= 2010]
    if df_plot.empty:
        continue
    years = df_plot['Year'].unique()
    for year in years:
        df_year = df_plot[df_plot['Year'] == year]
        plt.figure(figsize=(10, 5))
        if 'T2M' in df_year.columns:
            plt.plot(df_year['MO'], df_year['T2M'], marker='o', label='Mean Temperature (K)')
        if 'Specific Humidity' in df_year.columns:
            plt.plot(df_year['MO'], df_year['Specific Humidity'], marker='x', label='Specific Humidity (kg/kg)')
        if 'Relative Humidity' in df_year.columns:
            plt.plot(df_year['MO'], df_year['Relative Humidity'], marker='s', label='Relative Humidity (%)')
        plt.title(f'{sheet} - Monthly Means - {year}')
        plt.xlabel('Month')
        plt.ylabel('Values')
        plt.legend()
        plt.grid()
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.show()