<a href="https://colab.research.google.com/github/ankit7719/predictive_lab/blob/main/PA_project_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install tensorflow scikit-learn




In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split

# Load the dataset
data = pd.read_csv('/content/Food Waste data and research - by country.csv')


# Encode 'Confidence in estimate' (for classification)
label_encoder = LabelEncoder()
data['Confidence_encoded'] = label_encoder.fit_transform(data['Confidence in estimate'])

# One-Hot encode 'Region' (as it's categorical)
data = pd.get_dummies(data, columns=['Region'], drop_first=True)

# Scale numeric columns
scaler = StandardScaler()
numeric_cols = ['Household estimate (kg/capita/year)', 'Household estimate (tonnes/year)',
                'Retail estimate (kg/capita/year)', 'Retail estimate (tonnes/year)',
                'Food service estimate (kg/capita/year)', 'Food service estimate (tonnes/year)', 'M49 code']

data[numeric_cols] = scaler.fit_transform(data[numeric_cols])

# Function to select features for a specific country
def get_country_data(country_name):
    country_data = data[data['Country'] == country_name]
    if country_data.empty:
        raise ValueError(f"Country '{country_name}' not found in the dataset.")

    X_country = country_data.drop(['Country', 'combined figures (kg/capita/year)',
                                   'Confidence in estimate', 'Source', 'Confidence_encoded'], axis=1)
    return X_country

# For classification (confidence estimate), return label encoder too
def get_country_classification_data(country_name):
    X_country = get_country_data(country_name)
    return X_country, label_encoder


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Define the regression model
reg_model = Sequential([
    Dense(64, activation='relu', input_shape=(data.drop(['Country', 'combined figures (kg/capita/year)',
                                                         'Confidence in estimate', 'Source', 'Confidence_encoded'], axis=1).shape[1],)),
    Dense(32, activation='relu'),
    Dense(1)  # Output layer with a single neuron for regression
])

# Compile the model
reg_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Split data for regression
X_reg = data.drop(['Country', 'combined figures (kg/capita/year)', 'Confidence in estimate', 'Source', 'Confidence_encoded'], axis=1)
y_reg = data['combined figures (kg/capita/year)']
X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)

# Train the model
reg_model.fit(X_reg_train, y_reg_train, epochs=100, validation_split=0.2, batch_size=8)

# Function to make a regression prediction for a specific country
def predict_food_waste_for_country(country_name):
    X_country = get_country_data(country_name)
    prediction = reg_model.predict(X_country)
    print(f"Predicted food waste for {country_name}: {prediction[0][0]:.2f} kg/capita/year")

# Example usage:
predict_food_waste_for_country("Albania")  # Replace "Albania" with any country from the dataset


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - loss: 16749.7539 - mae: 127.5034 - val_loss: 17181.1094 - val_mae: 127.9796
Epoch 2/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 16517.4512 - mae: 126.7327 - val_loss: 16979.2188 - val_mae: 127.2515
Epoch 3/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 16068.4277 - mae: 125.4921 - val_loss: 16711.7676 - val_mae: 126.2581
Epoch 4/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 16391.8496 - mae: 125.9525 - val_loss: 16335.0430 - val_mae: 124.8265
Epoch 5/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 15446.2129 - mae: 122.5411 - val_loss: 15816.9180 - val_mae: 122.8046
Epoch 6/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 14694.3262 - mae: 119.3787 - val_loss: 15087.5068 - val_mae: 119.8955
Epoch 7/100
[1m17/17



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
Predicted food waste for Albania: 128.71 kg/capita/year


In [None]:
# Define the classification model
clf_model = Sequential([
    Dense(64, activation='relu', input_shape=(data.drop(['Country', 'combined figures (kg/capita/year)',
                                                         'Confidence in estimate', 'Source', 'Confidence_encoded'], axis=1).shape[1],)),
    Dense(32, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')  # Softmax for multi-class classification
])

# Compile the model
clf_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Split data for classification
X_clf = X_reg.copy()  # Same features as in regression
y_clf = data['Confidence_encoded']
X_clf_train, X_clf_test, y_clf_train, y_clf_test = train_test_split(X_clf, y_clf, test_size=0.2, random_state=42)

# Train the model
clf_model.fit(X_clf_train, y_clf_train, epochs=100, validation_split=0.2, batch_size=8)

# Function to make a classification prediction for a specific country
def predict_confidence_for_country(country_name):
    X_country, label_encoder = get_country_classification_data(country_name)
    prediction = clf_model.predict(X_country)
    predicted_class = prediction.argmax(axis=1)[0]
    confidence_level = label_encoder.inverse_transform([predicted_class])
    print(f"Predicted confidence estimate for {country_name}: {confidence_level[0]}")

# Example usage:
predict_confidence_for_country("Albania")  # Replace "Albania" with any country from the dataset


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - accuracy: 0.3569 - loss: 1.3662 - val_accuracy: 0.6286 - val_loss: 1.2310
Epoch 2/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6665 - loss: 1.1869 - val_accuracy: 0.7143 - val_loss: 1.1245
Epoch 3/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7228 - loss: 1.0193 - val_accuracy: 0.6857 - val_loss: 1.0565
Epoch 4/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7169 - loss: 0.9325 - val_accuracy: 0.6857 - val_loss: 1.0009
Epoch 5/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6599 - loss: 0.8684 - val_accuracy: 0.7143 - val_loss: 0.9648
Epoch 6/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7775 - loss: 0.6772 - val_accuracy: 0.7429 - val_loss: 0.9296
Epoch 7/100
[1m17/17[0m [32m━━



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
Predicted confidence estimate for Albania: Very Low Confidence


In [None]:
# Evaluate the regression model on the test set
reg_loss, reg_mae = reg_model.evaluate(X_reg_test, y_reg_test)
print(f"Regression Test Loss: {reg_loss}, Test MAE: {reg_mae}")


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 112.1204 - mae: 6.0337
Regression Test Loss: 114.4000015258789, Test MAE: 6.109315872192383


In [None]:
# Evaluate the classification model on the test set
clf_loss, clf_accuracy = clf_model.evaluate(X_clf_test, y_clf_test)
print(f"Classification Test Loss: {clf_loss}, Test Accuracy: {clf_accuracy}")


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.9172 - loss: 0.6238
Classification Test Loss: 0.6920087337493896, Test Accuracy: 0.9069767594337463


In [None]:
# Predict food waste for a specific country
predict_food_waste_for_country("India")
predict_food_waste_for_country("Pakistan")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Predicted food waste for India: 96.28 kg/capita/year
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Predicted food waste for Pakistan: 111.78 kg/capita/year


In [None]:
# Predict confidence estimate for a specific country

x=input('Enter Country according to dataset::')
# Function to provide solutions based on predicted confidence level
def suggest_solutions(confidence_level):
    if confidence_level == "Very Low Confidence":
        solution = """
        Suggested Solutions for Very Low Confidence:
        1. Improve Data Collection: Increase sampling sizes and use more accurate methods of data gathering.
        2. Collaborate with Local Authorities: Work with local governments to ensure accurate reporting from various sectors.
        3. Use Technology: Implement data-collection technologies like IoT sensors or automated systems to track waste in real-time.
        4. Public Awareness: Educate consumers and businesses on food waste, encouraging better reporting practices.
        """
    elif confidence_level == "Low Confidence":
        solution = """
        Suggested Solutions for Low Confidence:
        1. Improve Data Collection: Increase sampling sizes and use more accurate methods of data gathering.
        2. Collaborate with Local Authorities: Work with local governments to ensure accurate reporting from various sectors.
        3. Use Technology: Implement data-collection technologies like IoT sensors or automated systems to track waste in real-time.
        4. Partner with NGOs: Partner with organizations that specialize in food waste reduction to enhance data quality.
        """
    elif confidence_level == "Medium Confidence":
        solution = """
        Suggested Solutions for Medium Confidence:
        1. Review Current Data Collection Processes: Ensure that current methods are comprehensive and consider seasonal or regional variations.
        2. Focus on Outliers: Look at the data to identify any unusual trends or outliers that might reduce confidence in the estimate.
        3. Training: Provide training to businesses and organizations to improve the accuracy of their waste reporting.
        4. Partner with NGOs: Partner with organizations that specialize in food waste reduction to enhance data quality.
        """
    elif confidence_level == "High Confidence":
        solution = """
        Suggested Solutions for High Confidence:
        1. Continue Best Practices: Maintain existing data collection practices that have proven effective.
        2. Expand to Other Areas: With high confidence in estimates, consider expanding the data collection scope to more regions or sectors.
        3. Improve Waste Reduction Efforts: Focus on actual waste reduction strategies as the data reliability is high (e.g., redistribution of surplus food, improving supply chain efficiency).
        4. Support Policy Development: Use the high-confidence data to develop government policies aimed at waste reduction and sustainability.
        """
    else:
        solution = "Confidence level not recognized. No solutions available."

    return solution

# Function to make a classification prediction for a specific country and provide solutions
def predict_confidence_for_country_with_solution(country_name):
    X_country, label_encoder = get_country_classification_data(country_name)
    prediction = clf_model.predict(X_country)
    predicted_class = prediction.argmax(axis=1)[0]
    confidence_level = label_encoder.inverse_transform([predicted_class])[0]

    # Print the predicted confidence level
    print(f"Predicted confidence estimate for {country_name}: {confidence_level}")

    # Provide solutions based on the predicted confidence level
    solutions = suggest_solutions(confidence_level)
    print(solutions)

# Example usage:
predict_confidence_for_country_with_solution(x)  # Replace "Albania" with any country from the dataset


Enter Country according to dataset::Japan
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Predicted confidence estimate for Japan: Medium Confidence

        Suggested Solutions for Medium Confidence:
        1. Review Current Data Collection Processes: Ensure that current methods are comprehensive and consider seasonal or regional variations.
        2. Focus on Outliers: Look at the data to identify any unusual trends or outliers that might reduce confidence in the estimate.
        3. Training: Provide training to businesses and organizations to improve the accuracy of their waste reporting.
        4. Partner with NGOs: Partner with organizations that specialize in food waste reduction to enhance data quality.
        
