In [1]:
import pandas as pd 

In [2]:
df1 = pd.read_csv('city_day.csv')
df1.sample(2)

Unnamed: 0,City,Date,PM2.5,PM10,NO,NO2,NOx,NH3,CO,SO2,O3,Benzene,Toluene,Xylene,AQI,AQI_Bucket
4046,Bengaluru,2015-03-22,47.38,,2.84,22.39,15.33,23.19,2.76,2.05,71.85,14.97,138.2,,120.0,Moderate
2437,Amaravati,2019-03-18,22.52,69.14,7.47,13.59,13.38,7.71,0.56,10.99,21.12,1.17,4.07,0.33,101.0,Moderate


In [3]:
df2 = pd.read_csv('accident.csv')
df2.sample(2)

Unnamed: 0,State Name,City Name,Year,Month,Day of Week,Time of Day,Accident Severity,Number of Vehicles Involved,Vehicle Type Involved,Number of Casualties,...,Road Type,Road Condition,Lighting Conditions,Traffic Control Presence,Speed Limit (km/h),Driver Age,Driver Gender,Driver License Status,Alcohol Involvement,Accident Location Details
84,Manipur,Unknown,2020,May,Sunday,8:53,Minor,5,Two-Wheeler,10,...,Urban Road,Wet,Daylight,Signals,33,24,Female,Valid,Yes,Bridge
2678,Odisha,Unknown,2021,July,Saturday,0:49,Fatal,4,Bus,2,...,Urban Road,Under Construction,Dark,,104,59,Male,Expired,Yes,Intersection


In [4]:
df3 = pd.read_csv('crime.csv')
df3.sample(2)

Unnamed: 0,Report Number,Date Reported,Date of Occurrence,Time of Occurrence,City,Crime Code,Crime Description,Victim Age,Victim Gender,Weapon Used,Crime Domain,Police Deployed,Case Closed,Date Case Closed
5409,5410,13-08-2020 23:00,08-13-2020 09:00,14-08-2020 06:22,Kolkata,420,DOMESTIC VIOLENCE,21,F,Knife,Violent Crime,15,Yes,09-11-2020 23:00
4262,4263,27-06-2020 10:00,06-26-2020 14:00,26-06-2020 19:41,Delhi,424,KIDNAPPING,24,F,Other,Other Crime,19,No,


In [5]:
# Load and display sample
df4 = pd.read_csv('overall.csv')
df4.sample(5)

Unnamed: 0,City,Age_0_25,Age_25_50,Age_50_above,Male_Crime,Female_Crime,AQI,Total_Accidents
25,Kanpur,258,375,479,501,611,105 (Moderate),28.0
24,Kalyan,76,120,159,161,194,140 (Moderate),31.9
37,Patna,153,247,295,300,395,215 (Poor),31.9
50,Vadodara,178,292,85,428,127,118 (Moderate),32.0
33,Mysore,124,204,59,299,88,83 (Satisfactory),37.0


In [6]:
df4.shape

(55, 8)

In [7]:
import pandas as pd

# Load the data
df = pd.read_csv('overall.csv')

# Extract numeric AQI values from the AQI column (e.g., "118 (Moderate)" -> 118)
df['AQI_Numeric'] = df['AQI'].str.extract(r'(\d+)').astype(float)
df['AQI_Category'] = df['AQI'].str.extract(r'\((.*)\)')

# AQI Risk Mapping
aqi_risk_map = {
    'Good': 0.0,
    'Satisfactory': 0.2,
    'Moderate': 0.5,
    'Poor': 0.8,
    'Very Poor': 1.0,
    'Severe': 1.0
}

df['AQI_Risk'] = df['AQI_Category'].map(aqi_risk_map)

# Find maximum values for normalization
max_crime = max(df['Male_Crime'].max(), df['Female_Crime'].max())
max_age_0_25 = df['Age_0_25'].max()
max_age_25_50 = df['Age_25_50'].max()
max_age_50_above = df['Age_50_above'].max()
max_accidents = df['Total_Accidents'].max()

print(f"Max Crime: {max_crime}")
print(f"Max Age 0-25: {max_age_0_25}")
print(f"Max Age 25-50: {max_age_25_50}")
print(f"Max Age 50+: {max_age_50_above}")
print(f"Max Accidents: {max_accidents}")

df.drop(['AQI'],axis=1,inplace=True)
df.head()

Max Crime: 2956
Max Age 0-25: 1158
Max Age 25-50: 1976
Max Age 50+: 2266
Max Accidents: 39.0


Unnamed: 0,City,Age_0_25,Age_25_50,Age_50_above,Male_Crime,Female_Crime,Total_Accidents,AQI_Numeric,AQI_Category,AQI_Risk
0,Agra,184,270,310,307,457,31.9,137.0,Moderate,0.5
1,Ahmedabad,388,649,780,784,1033,30.0,118.0,Moderate,0.5
2,Aizawl,38,62,18,92,26,31.9,37.0,Good,0.0
3,Amaravati,72,118,34,174,50,31.9,220.0,Poor,0.8
4,Amritsar,198,326,94,478,140,31.9,55.0,Satisfactory,0.2


In [8]:
def calculate_safety_score(city, age, gender):
    """
    Calculate safety score for a person based on city, age, and gender.
    
    Parameters:
    - city: City name (string)
    - age: Age of the person (integer)
    - gender: 'M' for Male, 'F' for Female
    
    Returns:
    - Safety score percentage (0-100)
    """
    
    # Find city data
    city_data = df[df['City'] == city]
    
    if city_data.empty:
        return f"City '{city}' not found in dataset"
    
    city_data = city_data.iloc[0]
    
    # 1. Crime Risk based on gender
    if gender.upper() == 'M':
        person_crime = city_data['Male_Crime']
    elif gender.upper() == 'F':
        person_crime = city_data['Female_Crime']
    else:
        return "Invalid gender. Use 'M' or 'F'"
    
    crime_risk = person_crime / max_crime
    
    # 2. Age Risk based on age group
    if age < 25:
        age_group_crime = city_data['Age_0_25']
        max_age_group = max_age_0_25
    elif age < 50:
        age_group_crime = city_data['Age_25_50']
        max_age_group = max_age_25_50
    else:
        age_group_crime = city_data['Age_50_above']
        max_age_group = max_age_50_above
    
    age_risk = age_group_crime / max_age_group
    
    # 3. AQI Risk
    aqi_risk = city_data['AQI_Risk']
    
    # 4. Accident Risk
    accident_risk = city_data['Total_Accidents'] / max_accidents
    
    # Calculate Total Risk with weights
    total_risk = (0.40 * crime_risk) + (0.25 * age_risk) + (0.20 * aqi_risk) + (0.15 * accident_risk)
    
    # Calculate Safety Score
    safety_score = (1 - total_risk) * 100
    
    # Display breakdown
    print(f"\n{'='*60}")
    print(f"Safety Analysis for {city}")
    print(f"{'='*60}")
    print(f"Person: Age {age}, Gender {gender.upper()}")
    print(f"\nRisk Breakdown:")
    print(f"  Crime Risk (40%):    {crime_risk:.4f} ({person_crime} crimes)")
    print(f"  Age Risk (25%):      {age_risk:.4f} ({age_group_crime} crimes in age group)")
    print(f"  AQI Risk (20%):      {aqi_risk:.4f} ({city_data['AQI_Category']})")
    print(f"  Accident Risk (15%): {accident_risk:.4f} ({city_data['Total_Accidents']} accidents)")
    print(f"\nTotal Risk Score:     {total_risk:.4f}")
    print(f"Safety Score:         {safety_score:.2f}%")
    print(f"{'='*60}")
    
    # Recommendation
    if safety_score >= 70:
        recommendation = "✅ SAFE - Good choice to live here!"
    elif safety_score >= 50:
        recommendation = "⚠️ MODERATE - Consider safety precautions"
    else:
        recommendation = "❌ HIGH RISK - Not recommended"
    
    print(f"\nRecommendation: {recommendation}")
    
    return safety_score

# Example usage
calculate_safety_score('Mumbai', 30, 'M')


Safety Analysis for Mumbai
Person: Age 30, Gender M

Risk Breakdown:
  Crime Risk (40%):    0.6495 (1920 crimes)
  Age Risk (25%):      0.8188 (1618 crimes in age group)
  AQI Risk (20%):      0.5000 (Moderate)
  Accident Risk (15%): 0.7179 (28.0 accidents)

Total Risk Score:     0.6722
Safety Score:         32.78%

Recommendation: ❌ HIGH RISK - Not recommended


np.float64(32.77906597711099)

In [9]:
import pickle

# Save the processed data and max values to pickle file
model_data = {
    'df': df,
    'max_crime': max_crime,
    'max_age_0_25': max_age_0_25,
    'max_age_25_50': max_age_25_50,
    'max_age_50_above': max_age_50_above,
    'max_accidents': max_accidents
}

with open('city_safety_model.pkl', 'wb') as f:
    pickle.dump(model_data, f)

print("✅ Model data saved to 'city_safety_model.pkl'")

✅ Model data saved to 'city_safety_model.pkl'
