<a href="https://colab.research.google.com/github/VardhanYadav/Cleanest-City-Prediction-Model/blob/main/2024_cleanest_city_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Importing Libraries

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

Uploading the CSV file

In [None]:
from google.colab import files
uploaded = files.upload()

Saving Cleanest_Cities_India.csv to Cleanest_Cities_India.csv


Reading the dataset

In [None]:
# Load your dataset
data = pd.read_csv("Cleanest_Cities_India.csv")


# Calculate the average increase in score from 2016 to 2023
data['avg_increase'] = (data['2023_Score_Max10000'] - data['2016_Score']) / 7
data['2024_Score_Predicted'] = data['2023_Score_Max10000'] + data['avg_increase']

# Features and target variable
X = data[['2016_Score', '2017_Score', '2018_Score', '2019_Score_5000', '2020_Score_Max6000', '2022_Score_Max7500', '2023_Score_Max10000']]
y = data['2024_Score_Predicted']


Creating variable to perform the Algorothm

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Initialize the scaler
scaler = StandardScaler()

# Scale the training and test data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Selecting the Algorithm to train the model

In [None]:
model_gb = GradientBoostingRegressor()
model_gb.fit(X_train_scaled, y_train)

Testing the result

In [None]:
y_pred_gb = model_gb.predict(X_test_scaled)
print(y_pred_gb[:100])

[ 7365.70999171  7291.7037759   7365.88846132  7365.70999171
  7365.94803356  7365.70999171  7365.70999171  7365.94803356
  7365.70999171  7365.70999171  7365.65041946  7365.88846132
  7365.70999171  7365.65041946  7365.70999171  7365.70999171
  7365.70999171  9509.91113392  7365.94803356  7365.70999171
  7365.65041946  7365.70999171  9799.37649447  7365.70999171
  7365.70999171  7365.65041946  7365.65041946  7365.65041946
  7365.70999171  7365.70999171  7365.70999171  7365.70999171
  7365.70999171  7365.94803356  7365.70999171  7365.70999171
  5462.26646991  7365.70999171  7365.70999171  7365.94803356
  7365.65041946  4594.19355844  7365.70999171  7365.65041946
 10376.2849166   7365.70999171  7365.70999171  7365.70999171
  7365.70999171  7365.70999171  7365.65041946  7365.70999171
  7365.70999171  7399.97697162  7365.70999171  7365.70999171
  7365.70999171  7365.70999171  7365.70999171  7389.46509373
  7365.65041946  7365.70999171  7365.70999171  7365.88846132
  7365.65041946  7365.70

In [None]:
r2 = r2_score(y_test, y_pred_gb)

In [None]:
print(f"R-squared: {r2}")

R-squared: 0.996102753523973


Creating Interface using Gradio

In [None]:
pip install gradio

Collecting gradio
  Downloading gradio-4.44.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.115.0-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from g

Showcasing data in the form of graphs

In [None]:
!pip install plotly



Creating the command to print the graph for each year of data in ascending order

In [None]:
import plotly.express as px

# Example: Create an interactive scatter plot
data1 = data.sort_values(by='City Name', ascending=True)
fig1 = px.scatter(data1, x='City Name', y='2023_Score_Max10000')  # Replace with your actual column names
fig1.show()

fig2 = px.scatter(data1, x='City Name', y='2022_Score_Max7500')  # Replace with your actual column names
fig2.show()

fig3 = px.scatter(data1, x='City Name', y='2020_Score_Max6000')  # Replace with your actual column names
fig3.show()

fig4 = px.scatter(data1, x='City Name', y='2019_Score_5000')  # Replace with your actual column names
fig4.show()

fig5 = px.scatter(data1, x='City Name', y='2018_Score')  # Replace with your actual column names
fig5.show()

fig6 = px.scatter(data1, x='City Name', y='2017_Score')  # Replace with your actual column names
fig6.show()

fig7 = px.scatter(data1, x='City Name', y='2016_Score')  # Replace with your actual column names
fig7.show()

Creating the interface to predict the 2024 Result

In [None]:
from flask import Flask, render_template, request
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score
import io
import gradio as gr
import pandas as pd
from sklearn.model_selection import train_test_split
import threading
app = Flask(__name__)

data = pd.read_csv(io.BytesIO(uploaded['Cleanest_Cities_India.csv']))  # Replace 'filename.csv' with your actual file name
print(data)
# Load the dataset


# Features definition
X = data[['2016_Score', '2017_Score', '2018_Score', '2019_Score_5000',
          '2020_Score_Max6000', '2022_Score_Max7500', '2023_Score_Max10000']]

# Calculate the target variable for 2024 prediction
data['2024_Score_Predicted'] = data['2023_Score_Max10000'] + (
            data['2023_Score_Max10000'] - data['2016_Score']) / 7  # Example logic

# Define the target variable
y = data['2024_Score_Predicted']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and fit the scaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and fit the model
model = GradientBoostingRegressor()
model.fit(X_train_scaled, y_train)

# Calculate R-squared for the whole model
y_pred_test = model.predict(X_test_scaled)
r2 = r2_score(y_test, y_pred_test)
print(f"Overall R-squared for the model: {r2}")

# Function to fetch city data
def get_city_data(city):
    city_data = data[data['City Name'] == city]
    if city_data.empty:
        return None  # Return None if the city is not found
    return city_data.reset_index(drop=True)  # Return a DataFrame

# Function for Gradio prediction interface
def predict(city):
    if city not in data['City Name'].values:
        return "City not found.", "N/A"

    city_data = get_city_data(city)

    if city_data is None:
        return "City not found.", "N/A"

    # Extract scores in the correct order
    scores = city_data[['2016_Score', '2017_Score', '2018_Score',
                        '2019_Score_5000', '2020_Score_Max6000',
                        '2022_Score_Max7500', '2023_Score_Max10000']].values.flatten()

    # Scale the city data
    city_data_scaled = scaler.transform([scores])

    # Predict the 2024 score
    prediction = model.predict(city_data_scaled)[0]

    # Calculate the R-squared value for the model
    X_scaled = scaler.transform(X)
    y_pred = model.predict(X_scaled)
    accuracy = r2_score(y, y_pred)  # Calculate R²

    # Format the output as strings
    return f"{prediction:.2f}", f"{accuracy:.2f}"

def display_data():
    return data

# Create Gradio interface



# Create Gradio interface
def create_gradio_interface():
    interface = gr.Interface(
        fn=predict,
        inputs=gr.Dropdown(choices=data['City Name'].unique().tolist(), label="Select a City"),
        outputs=[
            gr.Textbox(label="2024 Predicted Value"),
            gr.Textbox(label="Accuracy (R²)")
        ],
        title="City Cleanliness Prediction",
        description="Select a city to predict its cleanliness score for 2024."
    )
    return interface

# Create Gradio interface
gradio_interface = create_gradio_interface()

# Create a route for the home page
@app.route('/', methods=['GET', 'POST'])
def index():
    prediction = None
    city_name = None
    accuracy = None

    if request.method == 'POST':
        city = request.form['city']

        # Fetch city data for the input city
        city_data = get_city_data(city)

        if city_data is not None:
            city_name = city_data['City Name'].values[0]
            scores = city_data[['2016_Score', '2017_Score', '2018_Score',
                                '2019_Score_5000', '2020_Score_Max6000',
                                '2022_Score_Max7500', '2023_Score_Max10000']].values.flatten()
            city_data_scaled = scaler.transform([scores])
            prediction = model.predict(city_data_scaled)[0]

            # Calculate R-squared value for the whole dataset
            X_scaled = scaler.transform(X)
            y_pred = model.predict(X_scaled)
            accuracy = r2_score(y, y_pred)

    return render_template('index_gradio.html', prediction=prediction, city_name=city_name, accuracy=accuracy)

# Launch Gradio interface in a separate thread
if __name__ == "__main__":
    gradio_thread = threading.Thread(target=gradio_interface.launch, kwargs={'share': True})
    gradio_thread.start()
    app.run(debug=True)


         City Name  2023_Score_Max10000  2022_Score_Max7500  \
0           INDORE          9348.390000         7146.410000   
1            SURAT          9348.390000         6924.840000   
2      NAVI MUMBAI          9240.300000         6852.910000   
3    VISAKHAPATNAM          8879.300000         6701.180000   
4           BHOPAL          8855.200000         6608.410000   
..             ...                  ...                 ...   
554       Siddipet          6593.857674         3728.456823   
555        Sitapur          6593.857674         3728.456823   
556          Tifra          6593.857674         3728.456823   
557         Tirupa          6593.857674         3728.456823   
558        Udaipur          6593.857674         3728.456823   

     2020_Score_Max6000  2019_Score_5000   2018_Score   2017_Score  \
0           5647.560000      4659.000000  3707.000000  1808.000000   
1           5519.590000      3861.000000  3318.000000  1762.000000   
2           5467.890000      4129

 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with stat


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://f8fb3610d89853b555.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)



X does not have valid feature names, but StandardScaler was fitted with feature names

