In [1]:
import pandas as pd

# Load the dataset
file_path = 'rainfall1627650742214.csv'
df = pd.read_csv(file_path)

# Display the first few rows of the dataframe
print("Initial Dataframe:")
print(df.head())

Initial Dataframe:
                 STATE_UT_NAME       DISTRICT    JAN   FEB    MAR    APR  \
0  ANDAMAN And NICOBAR ISLANDS        NICOBAR  107.3  57.9   65.2  117.0   
1  ANDAMAN And NICOBAR ISLANDS  SOUTH ANDAMAN   43.7  26.0   18.6   90.5   
2  ANDAMAN And NICOBAR ISLANDS  N & M ANDAMAN   32.7  15.9    8.6   53.4   
3            ARUNACHAL PRADESH          LOHIT   42.2  80.8  176.4  358.5   
4            ARUNACHAL PRADESH     EAST SIANG   33.3  79.5  105.9  216.5   

     MAY    JUN    JUL    AUG    SEP    OCT    NOV    DEC  ANNUAL  Jan-Feb  \
0  358.5  295.5  285.0  271.9  354.8  326.0  315.2  250.9  2805.2    165.2   
1  374.4  457.2  421.3  423.1  455.6  301.2  275.8  128.3  3015.7     69.7   
2  343.6  503.3  465.4  460.9  454.8  276.1  198.6  100.0  2913.3     48.6   
3  306.4  447.0  660.1  427.8  313.6  167.1   34.1   29.8  3043.8    123.0   
4  323.0  738.3  990.9  711.2  568.0  206.9   29.5   31.7  4034.7    112.8   

   Mar-May  Jun-Sep  Oct-Dec  
0    540.7   1207.2    8

In [3]:
# Step 2: Check for missing values and drop the corresponding rows
df.dropna(inplace=True)
print(df)

                   STATE_UT_NAME        DISTRICT    JAN   FEB    MAR    APR  \
0    ANDAMAN And NICOBAR ISLANDS         NICOBAR  107.3  57.9   65.2  117.0   
1    ANDAMAN And NICOBAR ISLANDS   SOUTH ANDAMAN   43.7  26.0   18.6   90.5   
2    ANDAMAN And NICOBAR ISLANDS   N & M ANDAMAN   32.7  15.9    8.6   53.4   
3              ARUNACHAL PRADESH           LOHIT   42.2  80.8  176.4  358.5   
4              ARUNACHAL PRADESH      EAST SIANG   33.3  79.5  105.9  216.5   
..                           ...             ...    ...   ...    ...    ...   
636                       KERALA          IDUKKI   13.4  22.1   43.6  150.4   
637                       KERALA        KASARGOD    2.3   1.0    8.4   46.9   
638                       KERALA  PATHANAMTHITTA   19.8  45.2   73.9  184.9   
639                       KERALA         WAYANAD    4.8   8.3   17.5   83.3   
640                  LAKSHADWEEP     LAKSHADWEEP   20.8  14.7   11.8   48.9   

       MAY    JUN     JUL    AUG    SEP    OCT    N

In [4]:
# Step 3: Find the district that gets the highest annual rainfall
df['Annual'] = df.iloc[:, 2:].sum(axis=1)
max_rainfall_district = df.loc[df['Annual'].idxmax()]

print("\nDistrict with the highest annual rainfall:")
print(max_rainfall_district)


District with the highest annual rainfall:
STATE_UT_NAME       MANIPUR
DISTRICT         TAMENGLONG
JAN                    48.5
FEB                   229.6
MAR                   224.5
APR                   431.5
MAY                   539.9
JUN                  1158.7
JUL                  1820.9
AUG                  1522.1
SEP                   726.3
OCT                   376.1
NOV                   144.0
DEC                     7.2
ANNUAL               7229.3
Jan-Feb               278.1
Mar-May              1195.9
Jun-Sep              5228.0
Oct-Dec               527.3
Annual              21687.9
Name: 55, dtype: object


In [5]:
# Step 4: Display the top 5 states that get the highest annual rainfall
top_states = df.groupby('STATE_UT_NAME')['Annual'].sum().sort_values(ascending=False).head(5)
print("\nTop 5 states with the highest annual rainfall:")
print(top_states)


Top 5 states with the highest annual rainfall:
STATE_UT_NAME
UTTAR PRADESH        203509.8
ASSAM                198803.1
MADHYA PRADESH       154846.5
ARUNACHAL PRADESH    140514.0
BIHAR                136863.3
Name: Annual, dtype: float64


In [6]:
# Step 5: Drop the specified columns
df.drop(columns=['Jan-Feb', 'Mar-May', 'Jun-Sep', 'Oct-Dec'], inplace=True)
print(df)

                   STATE_UT_NAME        DISTRICT    JAN   FEB    MAR    APR  \
0    ANDAMAN And NICOBAR ISLANDS         NICOBAR  107.3  57.9   65.2  117.0   
1    ANDAMAN And NICOBAR ISLANDS   SOUTH ANDAMAN   43.7  26.0   18.6   90.5   
2    ANDAMAN And NICOBAR ISLANDS   N & M ANDAMAN   32.7  15.9    8.6   53.4   
3              ARUNACHAL PRADESH           LOHIT   42.2  80.8  176.4  358.5   
4              ARUNACHAL PRADESH      EAST SIANG   33.3  79.5  105.9  216.5   
..                           ...             ...    ...   ...    ...    ...   
636                       KERALA          IDUKKI   13.4  22.1   43.6  150.4   
637                       KERALA        KASARGOD    2.3   1.0    8.4   46.9   
638                       KERALA  PATHANAMTHITTA   19.8  45.2   73.9  184.9   
639                       KERALA         WAYANAD    4.8   8.3   17.5   83.3   
640                  LAKSHADWEEP     LAKSHADWEEP   20.8  14.7   11.8   48.9   

       MAY    JUN     JUL    AUG    SEP    OCT    N

In [7]:
# Step 6: Display the state-wise mean rainfall for all the months using a pivot table
monthly_columns = df.columns[2:14]  # Assuming columns from 2 to 13 are monthly data
pivot_table = df.pivot_table(index='STATE_UT_NAME', values=monthly_columns, aggfunc='mean')
print("\nState-wise mean rainfall for all the months:")
print(pivot_table)


State-wise mean rainfall for all the months:
                                    APR         AUG         DEC        FEB  \
STATE_UT_NAME                                                                
ANDAMAN And NICOBAR ISLANDS   86.966667  385.300000  159.733333  33.266667   
ANDHRA PRADESH                19.873913  179.426087   15.565217   7.352174   
ARUNACHAL PRADESH            275.162500  378.600000   35.956250  93.293750   
ASSAM                        181.266667  377.370370   11.440741  31.714815   
BIHAR                         16.865789  289.481579    5.786842   9.278947   
CHANDIGARH                    14.800000  287.500000   23.400000  38.900000   
CHATISGARH                    13.116667  375.338889    5.811111  10.472222   
DADAR NAGAR HAVELI             0.000000  655.900000    0.000000   0.300000   
DAMAN AND DUI                  0.100000  394.600000    0.450000   0.500000   
DELHI                          8.900000  245.500000    8.600000  16.300000   
GOA               

In [8]:
# Step 7: Display the count of districts in each state
district_count = df['STATE_UT_NAME'].value_counts()
print("\nCount of districts in each state:")
print(district_count)


Count of districts in each state:
STATE_UT_NAME
UTTAR PRADESH                  71
MADHYA PRADESH                 50
BIHAR                          38
MAHARASHTRA                    35
RAJASTHAN                      33
TAMIL NADU                     32
KARNATAKA                      30
ORISSA                         30
ASSAM                          27
GUJARAT                        26
JHARKHAND                      24
ANDHRA PRADESH                 23
JAMMU AND KASHMIR              22
HARYANA                        21
PUNJAB                         20
WEST BENGAL                    19
CHATISGARH                     18
ARUNACHAL PRADESH              16
KERALA                         14
UTTARANCHAL                    13
HIMACHAL                       12
NAGALAND                       11
MIZORAM                         9
MANIPUR                         9
DELHI                           9
MEGHALAYA                       7
SIKKIM                          4
TRIPURA                         4

In [10]:
# Step 8: For each state, display the district that gets the highest rainfall in May
df['MAY'] = df['MAY'].astype(float)  # Ensure MAY column is float for comparison
highest_may_rainfall = df.loc[df.groupby('STATE_UT_NAME')['MAY'].idxmax()]
print("\nDistricts with the highest rainfall in MAY for each state:")
print(highest_may_rainfall[['STATE_UT_NAME', 'DISTRICT', 'MAY']])


Districts with the highest rainfall in MAY for each state:
                   STATE_UT_NAME       DISTRICT    MAY
1    ANDAMAN And NICOBAR ISLANDS  SOUTH ANDAMAN  374.4
544               ANDHRA PRADESH  VISAKHAPATNAM   96.6
10             ARUNACHAL PRADESH     PAPUM PARE  453.0
31                         ASSAM      KARIMGANJ  604.0
194                        BIHAR     KISHANGANJ  155.7
306                   CHANDIGARH     CHANDIGARH   30.1
519                   CHATISGARH         BASTAR   38.6
479           DADAR NAGAR HAVELI            DNH    7.4
480                DAMAN AND DUI          DAMAN    7.4
307                        DELHI      NEW DELHI   19.3
488                          GOA      NORTH GOA   94.3
458                      GUJARAT          DANGS   12.5
303                      HARYANA      PANCHKULA   27.9
341                     HIMACHAL  LAHUL & SPITI   91.7
349            JAMMU AND KASHMIR      BARAMULLA  111.4
154                    JHARKHAND          PAKUR   86.1
598  