name : vishnu roopesh .c
usn : 22btrcl169

**Clean and gain Insights from the Hospital Data bold text**

In [None]:
import pandas as pd

# Load hospital data and metadata
hospital_data = pd.read_csv('/content/hospitals.csv')
metadata = pd.read_csv('/content/metadata.csv')

# Extract acronyms and definitions from metadata
acronyms_definitions = metadata['Acronyms'].str.split(',', expand=True)

# Extract the first row as acronyms and the remaining rows as definitions
acronyms = acronyms_definitions.iloc[0].tolist()
definitions = acronyms_definitions.iloc[1:].fillna('').apply(lambda row: ', '.join(row), axis=1).tolist()

# Iterate through the acronyms and definitions
for acronym, definition in zip(acronyms, definitions):
    # Extract the name before the parentheses from the definition
    name = definition.split('(')[0].strip()
    # Iterate through columns in hospital_data
    for col in hospital_data.columns:
        # Check if the acronym matches the part before parentheses in the hospital_data column name
        if acronym == col.split('(')[0].strip():
            # Rename the hospital_data column using the name extracted from the metadata
            hospital_data.rename(columns={col: name}, inplace=True)

# Add missing header "State/UT"
hospital_data.columns.values[0] = 'State/UT'

# Save modified hospital data to a new CSV file
hospital_data.to_csv('/content/mod/hospitals_modified.csv', index=False)

# Print the modified hospital data
print(hospital_data.head())


                    State/UT   PHC  CHC  SDH  DH Hospitals HospitalBeds
0  Andaman & Nicobar Islands    27    4  NaN   3        34         1246
1             Andhra Pradesh  1417  198   31  20      1666        60799
2          Arunachal Pradesh   122   62  NaN  15       199         2320
3                      Assam  1007  166   14  33      1220        19115
4                      Bihar  2007   63   33  43      2146        17796


NAME : VISHNU ROOPESH.C
USN : 22BTRCL169


**Problem Statement 10: (Fix the header) **

In [None]:
import pandas as pd

# Read the CSV file
hospital_df = pd.read_csv('/content/mod/hospitals_modified.csv')

# Define the column name mappings
fullname = {
    "PHC": "Number of Primary Health Centers",
    "CHC": "Community Health Centers",
    "SDH": "Sub-District/Divisional Hospitals",
    "DH": "District Hospitals",
    "Hospitals": "Hospitals",
    "HospitalBeds": "Hospital Beds"
}

# Rename columns
hospital_df.rename(columns=fullname, inplace=True)
hospital_df.rename(columns={hospital_df.columns[0]: 'State/UT'}, inplace=True)

# Save the modified DataFrame to the same CSV file, overwriting the original
hospital_df.to_csv('/content/mod/hospitals_modified.csv', index=False)

# Print the first few rows of the modified DataFrame
print(hospital_df.head())


                    State/UT Number of Primary Health Centers  \
0  Andaman & Nicobar Islands                               27   
1             Andhra Pradesh                             1417   
2          Arunachal Pradesh                              122   
3                      Assam                             1007   
4                      Bihar                             2007   

  Community Health Centers Sub-District/Divisional Hospitals  \
0                        4                               NaN   
1                      198                                31   
2                       62                               NaN   
3                      166                                14   
4                       63                                33   

  District Hospitals Hospitals Hospital Beds  
0                  3        34          1246  
1                 20      1666         60799  
2                 15       199          2320  
3                 33      1220      

NAME : VISHNU ROOPESH.C
 USN : 22BTRCL169

# Problem Statement 11: (Create a function to alter the data to create uniformity)

In [None]:
import pandas as pd
import os

def fix_state_ut_names(data):

    state_ut_mapping = {
        'A&N Islands': 'Andaman and Nicobar Islands',
        'Andaman & Nicobar Islands': 'Andaman and Nicobar Islands',
        'D&N Haveli': 'Dadra and Nagar Haveli and Daman and Diu',
        'Daman & Diu': 'Dadra and Nagar Haveli and Daman and Diu',
        'NCT of Delhi': 'Delhi',
        'NCT OF DELHI': 'Delhi',
        'Jammu & Kashmir': 'Jammu and Kashmir',
        'Lakshadweep': 'Lakshadweep',
        'Odisha': 'Odisha',
        'Puducherry': 'Puducherry',
        'Uttaranchal': 'Uttarakhand',
        'AN Islands': 'Andaman and Nicobar Islands',
        'Chandigarh': 'Chandigarh',
        'Pondicherry': 'Puducherry',
    }

    # Apply mapping to fix State/UT names
    data['State/UT'] = data['State/UT'].map(state_ut_mapping).fillna(data['State/UT'])
    return data

hospital_data = pd.read_csv('/content/mod/hospitals_modified.csv')

hospital_data_fixed = fix_state_ut_names(hospital_data)

output_folder = 'Clean_Data'
os.makedirs(output_folder, exist_ok=True)
output_file = os.path.join(output_folder, 'all_hospitals.csv')
hospital_data_fixed.to_csv(output_file, index=False)

print(hospital_data_fixed.head())

                      State/UT Number of Primary Health Centers  \
0  Andaman and Nicobar Islands                               27   
1               Andhra Pradesh                             1417   
2            Arunachal Pradesh                              122   
3                        Assam                             1007   
4                        Bihar                             2007   

  Community Health Centers Sub-District/Divisional Hospitals  \
0                        4                               NaN   
1                      198                                31   
2                       62                               NaN   
3                      166                                14   
4                       63                                33   

  District Hospitals Hospitals Hospital Beds  
0                  3        34          1246  
1                 20      1666         60799  
2                 15       199          2320  
3                 33    

**PROBLEM 12**

DAYANANDASHABARI S - 22BTRCL045

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Step 1: Calculate hospital beds per 10,000 people for each state/UT
def calculate_beds_per_10k(data):
    data['Beds per 10k'] = (data['Total Beds'] / data['Population']) * 10000
    return data

# Read hospital data
hospital_data = pd.read_csv('/content/government_hospitals.csv')

# Apply calculation
hospital_data = calculate_beds_per_10k(hospital_data)

# Step 2: Visualize the data
plt.figure(figsize=(12, 8))
plt.bar(hospital_data['State/UT'], hospital_data['Beds per 10k'], color='skyblue')
plt.axhline(hospital_data['Beds per 10k'].mean(), color='red', linestyle='--', label='National Average')
plt.xticks(rotation=90)
plt.xlabel('State/UT')
plt.ylabel('Hospital Beds per 10,000 People')
plt.title('Hospital Beds per 10,000 People by State/UT')
plt.legend()
plt.tight_layout()
plt.show()

# Step 3: Identify three states/UTs with the least amount of beds for their population
least_beds_states = hospital_data.nsmallest(3, 'Beds per 10k')[['State/UT', 'Beds per 10k']]
print("States/UTs with the least amount of beds for their population:")
print(least_beds_states)

# Problem Statement 13
## NAME : Suraj J
USN : 22BTRCL153

In [None]:
import pandas as pd

# Define the output filename
output_filename = "/content/government_hospitals.csv"

# Read the data from the CSV file
df = pd.read_csv("/content/government_hospitals.csv", skiprows=[0,1])

# Create a new DataFrame with the desired column names
new_df = pd.DataFrame({
    "State/UT": df.iloc[:, 0],
    "Rural_Government_Hospitals": df.iloc[:, 1].fillna(0).astype(int),
    "Rural_Government_Beds": df.iloc[:, 2].fillna(0).astype(int),
    "Urban_Government_Hospitals": df.iloc[:, 3].fillna(0).astype(int),
    "Urban_Government_Beds": df.iloc[:, 4].fillna(0).astype(int),
    "Last_Updated": df.iloc[:, 5]
})

# Save the new DataFrame to a CSV file
new_df.to_csv(output_filename, index=False)

print(f"Cleaned data saved to: {output_filename}")

Cleaned data saved to: /content/government_hospitals.csv


NAME : GUNJ
USN :  22BTRCL058
# **Problem Statement 14: (Data update and code reuse**

In [20]:
import pandas as pd

df = pd.read_csv('/content/hospitals.csv')

df

Unnamed: 0.1,Unnamed: 0,PHC,CHC,SDH,DH,Hospitals,HospitalBeds
0,Andaman & Nicobar Islands,27,4,,3,34,1246
1,Andhra Pradesh,1417,198,31.0,20,1666,60799
2,Arunachal Pradesh,122,62,,15,199,2320
3,Assam,1007,166,14.0,33,1220,19115
4,Bihar,2007,63,33.0,43,2146,17796
5,Chandigarh,40,2,1.0,4,47,3756
6,Chhattisgarh,813,166,12.0,32,1023,14354
7,Dadra & Nagar Haveli,9,2,1.0,1,13,568
8,Daman & Diu,4,2,,2,8,298
9,Delhi,534,25,9.0,47,615,20572
