In [1]:
#!pip install numpy==1.21.5
#!pip install pandas==1.3.3



import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.svm import SVC
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.preprocessing import LabelEncoder



df = pd.read_csv('Norway_Air_Quality_Dataset.csv')

# Define air quality labeling function
def label_air_quality(value):
    if value < 50:
        return "Good"
    elif 50 <= value < 100:
        return "Medium"
    else:
        return "Bad"

# Apply labeling
df["Air_Quality"] = df["Value"].apply(label_air_quality)



# 2. Encode categorical features (State and Pollutant) and label (Air Quality)
df_encoded = df.copy()
label_encoder_state = LabelEncoder()
label_encoder_pollutant = LabelEncoder()
label_encoder_air_quality = LabelEncoder()

df_encoded["State"] = label_encoder_state.fit_transform(df["State"])
df_encoded["Pollutant"] = label_encoder_pollutant.fit_transform(df["Pollutant"])
df_encoded["Air_Quality_Label"] = label_encoder_air_quality.fit_transform(df["Air_Quality"])

# 3. Split data for classification
X = df_encoded[["State", "Pollutant", "Value"]]
y_class = df_encoded["Air_Quality_Label"]
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X, y_class, test_size=0.2, random_state=42)

# Classification models
classifiers = {
    "Random Forest Classifier": RandomForestClassifier(random_state=42),
    "Decision Tree Classifier": DecisionTreeClassifier(random_state=42),
    "Support Vector Classifier": SVC(random_state=42)
}

# Train and test classifiers
classification_results = {}
for name, model in classifiers.items():
    model.fit(X_train_class, y_train_class)
    predictions = model.predict(X_test_class)
    accuracy = accuracy_score(y_test_class, predictions)
    classification_results[name] = accuracy

# Display classification results
print("\nClassification Results (Accuracy):")
for model, accuracy in classification_results.items():
    print(f"{model}: {accuracy:.2f}")

# 4. Split data for regression
y_reg = df_encoded["Value"]
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X, y_reg, test_size=0.2, random_state=42)

# Regression models
regressors = {
    "Random Forest Regressor": RandomForestRegressor(random_state=42),
    "Decision Tree Regressor": DecisionTreeRegressor(random_state=42),
    "Linear Regression": LinearRegression()
}

# Train and test regressors
regression_results = {}
for name, model in regressors.items():
    model.fit(X_train_reg, y_train_reg)
    predictions = model.predict(X_test_reg)
    mse = mean_squared_error(y_test_reg, predictions)
    regression_results[name] = mse

# Display regression results
print("\nRegression Results (Mean Squared Error):")
for model, mse in regression_results.items():
    print(f"{model}: {mse:.2f}")



Classification Results (Accuracy):
Random Forest Classifier: 1.00
Decision Tree Classifier: 1.00
Support Vector Classifier: 1.00

Regression Results (Mean Squared Error):
Random Forest Regressor: 0.92
Decision Tree Regressor: 1.57
Linear Regression: 0.00


# Naive Bayes

In [2]:
from sklearn.naive_bayes import GaussianNB  # Import Naive Bayes

# Add Naive Bayes to the list of classifiers
classifiers = {
    "Random Forest Classifier": RandomForestClassifier(random_state=42),
    "Decision Tree Classifier": DecisionTreeClassifier(random_state=42),
    "Support Vector Classifier": SVC(random_state=42),
    "Naive Bayes Classifier": GaussianNB()  # Adding Naive Bayes
}

# Train and test classifiers
classification_results = {}
for name, model in classifiers.items():
    model.fit(X_train_class, y_train_class)
    predictions = model.predict(X_test_class)
    accuracy = accuracy_score(y_test_class, predictions)
    classification_results[name] = accuracy

# Display classification results
print("\nClassification Results (Accuracy):")
for model, accuracy in classification_results.items():
    print(f"{model}: {accuracy:.2f}")



Classification Results (Accuracy):
Random Forest Classifier: 1.00
Decision Tree Classifier: 1.00
Support Vector Classifier: 1.00
Naive Bayes Classifier: 1.00


# Show in Map

In [3]:
#!pip install folium

import folium


# Map Norway states to approximate coordinates for plotting (replace with actual coordinates if available)
state_coordinates = {
    "Oslo": (59.9139, 10.7522),
    "Bergen": (60.3913, 5.3221),
    "Trondheim": (63.4305, 10.3951),
    "Stavanger": (58.9699, 5.7331),
    "Tromsø": (69.6492, 18.9553),
    "Kristiansand": (58.1467, 7.9956),
    "Fredrikstad": (59.2181, 10.9394),
    "Drammen": (59.7439, 10.2045),
    "Skien": (59.2096, 9.6060),
    "Ålesund": (62.4722, 6.3873)
}

# Create a map centered around Norway
m = folium.Map(location=[60.472, 8.4689], zoom_start=5)

# Add markers to the map for each state
for state, coords in state_coordinates.items():
    air_quality = df[df["State"] == state]["Air_Quality"].mode()[0] 
    folium.Marker(
        location=coords,
        popup=f"{state}: Air Quality - {air_quality}",
        icon=folium.Icon(color="green" if air_quality == "Good" else "orange" if air_quality == "Medium" else "red")
    ).add_to(m)

# Save the map to an HTML file
m.save("norway_air_quality_map.html")

# To display the map in a Jupyter environment 
m