In [4]:
import pandas as pd

# Load the data
data = pd.read_csv('data.csv')

# Step 1: Handling Missing Values
# Replace "N/A" with Pandas' NaN
data.replace("N/A", pd.NA, inplace=True)

# Optionally, fill NaN values with a placeholder
# data.fillna("Unknown", inplace=True)

# Alternatively, you could drop rows or columns with missing values
# data.dropna(inplace=True)  # Uncomment this line if you want to drop rows with any missing values

# Step 2: Standardizing Units
# Assume that all units are already standardized in your dataset

# Step 3: Data Type Conversion
# Example: Convert Carbon_Footprint to float if it's stored as string
# This step may not be necessary if your data is already in the correct data type
# data['Carbon_Footprint'] = data['Carbon_Footprint'].astype(float)  # Uncomment & adjust if necessary

# Step 4: Categorical Data Encoding
# Example: Convert Energy_Efficiency to numerical values
energy_efficiency_mapping = {'A+': 3, 'A': 2, 'A-': 1}
data['Energy_Efficiency'] = data['Energy_Efficiency'].map(energy_efficiency_mapping)

# Step 5: Checking for Duplicates
# Remove duplicate rows
data.drop_duplicates(inplace=True)

# Save the cleaned data back to a CSV file
data.to_csv('cleaned_eco_products.csv', index=False)

# Now your data is cleaned and preprocessed, and ready for further analysis or processing!


In [6]:
import pandas as pd

# Load the cleaned data
data = pd.read_csv('cleaned_eco_products.csv')

# Convert 'Water_Usage' to a numerical format (assuming all values are in Liters)
# Extract the numerical part of the string, then convert to float
data['Water_Usage'] = data['Water_Usage'].str.extract('(\d+\.?\d*)').astype(float)

# Now you can proceed with handling missing values
# Imputation:
# Numerical Imputation: Fill missing values in 'Water_Usage' with the column median
data['Water_Usage'].fillna(data['Water_Usage'].median(), inplace=True)

# Categorical Imputation: Fill missing values in 'Energy_Efficiency' with a placeholder 'Unknown'
data['Energy_Efficiency'].fillna('Unknown', inplace=True)

# Alternatively, Removal:
# Uncomment the line below if you choose to remove the rows instead of imputation
# data.dropna(subset=['Energy_Efficiency', 'Water_Usage'], inplace=True)

# Save the updated data back to a CSV file
data.to_csv('updated_cleaned_eco_products.csv', index=False)


In [15]:
import pandas as pd

# Assuming df is your dataframe
df = pd.read_csv('Final_data.csv')


def label_products(row):
    labels = []
    
    # Energy Efficiency Labeling
    if row['Energy_Efficiency'] == 3.0:
        labels.append('Highly Energy Efficient')
    elif row['Energy_Efficiency'] == 2.0:
        labels.append('Moderately Energy Efficient')
    elif row['Energy_Efficiency'] == 1.0:
        labels.append('Low Energy Efficiency')
    elif row['Energy_Efficiency'] == 'Unknown':
        labels.append('Energy Efficiency Unknown')
    
    # Material based labeling
    if row['Material'] in ['Bamboo', 'Recycled']:
        labels.append('Sustainably Sourced')
    
    # Recyclable
    if row['Recyclable'] == 'Yes':
        labels.append('Recyclable')
    
    # Water usage
    if row['Water_Usage'] < 50.0:  # assuming water usage is a string like "30.0 Liters"
        labels.append('Water Efficient')
    
    # Eco Certification
    if row['Eco_Certification'] != 'None':
        labels.append('Certified Eco-Friendly')
    
    return ', '.join(labels)

df['Sustainability_Label'] = df.apply(label_products, axis=1)




In [31]:
import pandas as pd
import numpy as np
from sklearn.decomposition import NMF

# Load datasets
Final_data = pd.read_csv('Final_data.csv')
User_data = pd.read_csv('User_data.csv')

# Generate an empty interaction matrix
interaction_matrix = pd.DataFrame(np.zeros((len(User_data), len(Final_data))), columns=Final_data['Product_ID'], index=User_data['User_ID'])

# Fill interaction matrix based on matched preferences
for user_idx, user_row in User_data.iterrows():
    for product_idx, product_row in Final_data.iterrows():
        score = 0
        if user_row['Recyclable'] and product_row['Recyclable'] == 'Yes':
            score += 1
        if user_row['Energy Efficient'] and product_row['Eco_Certification'] in ['ENERGY STAR', 'Low VOC']:  # Assuming these certifications are related to energy efficiency
            score += 1
        if user_row['Water Efficient'] and 'Water Efficient' in product_row['Sustainability_Label']:
            score += 1
        if user_row['Certified Eco-Friendly'] and 'Certified Eco-Friendly' in product_row['Sustainability_Label']:
            score += 1
        interaction_matrix.at[user_row['User_ID'], product_row['Product_ID']] = score

# Apply matrix factorization using NMF
nmf = NMF(n_components=3)
user_matrix = nmf.fit_transform(interaction_matrix)
product_matrix = nmf.components_.T

# If you need the resultant matrices saved as CSV
pd.DataFrame(user_matrix, index=User_data['User_ID']).to_csv('user_matrix.csv')
pd.DataFrame(product_matrix, index=Final_data['Product_ID']).to_csv('product_matrix.csv')




In [34]:
def recommend_products_for_all_users(user_matrix, product_matrix, product_data, top_n=5):
    all_recommendations = []

    # Loop through each user
    for user_id in range(user_matrix.shape[0]):
        # Get the latent features for the user
        user_vector = user_matrix[user_id]
        
        # Calculate the score for every product
        scores = np.dot(user_vector, product_matrix.T)
        
        # Get the indices of the products sorted by descending scores
        sorted_product_indices = scores.argsort()[::-1]
        
        # Pick the top_n product indices
        top_product_indices = sorted_product_indices[:top_n]
        
        # Get the top product names
        top_product_names = product_data.iloc[top_product_indices]['Product_Name'].values
        
        # Append the recommendations to the all_recommendations list
        all_recommendations.append([user_id + 1] + list(top_product_names))  # Assuming User_ID starts from 1

    return all_recommendations

# Load the latent feature matrices from CSV (if saved)
user_matrix = pd.read_csv('user_matrix.csv', index_col=0).values
product_matrix = pd.read_csv('product_matrix.csv', index_col=0).values

# Get recommendations for all users
all_user_recommendations = recommend_products_for_all_users(user_matrix, product_matrix, Final_data)

# Convert recommendations to DataFrame and save to CSV
recommendations_df = pd.DataFrame(all_user_recommendations, columns=['User_ID', 'Rec_Product_1', 'Rec_Product_2', 'Rec_Product_3', 'Rec_Product_4', 'Rec_Product_5'])
recommendations_df.to_csv('user_recommendations1.csv', index=False)


In [35]:
def recommend_products_for_all_users(user_matrix, product_matrix, product_data, top_n=5):
    all_recommendations = []

    # Loop through each user
    for user_id in range(user_matrix.shape[0]):
        # Get the latent features for the user
        user_vector = user_matrix[user_id]
        
        # Calculate the score for every product
        scores = np.dot(user_vector, product_matrix.T)
        
        # Get the indices of the products sorted by descending scores
        sorted_product_indices = scores.argsort()[::-1]
        
        # Pick the top_n product indices
        top_product_indices = sorted_product_indices[:top_n]
        
        # Get the top product names
        top_product_names = product_data.iloc[top_product_indices]['Product_Name'].values
        
        # Create a concatenated string of recommended product names
        recommended_products_str = ', '.join(top_product_names)
        
        # Append the recommendations to the all_recommendations list
        all_recommendations.append([user_id + 1, recommended_products_str])  # Assuming User_ID starts from 1

    return all_recommendations

# Load the latent feature matrices from CSV (if saved)
user_matrix = pd.read_csv('user_matrix.csv', index_col=0).values
product_matrix = pd.read_csv('product_matrix.csv', index_col=0).values

# Get recommendations for all users
all_user_recommendations = recommend_products_for_all_users(user_matrix, product_matrix, Final_data)

# Convert recommendations to DataFrame and save to CSV
recommendations_df = pd.DataFrame(all_user_recommendations, columns=['User_ID', 'Recommended_Products'])
recommendations_df.to_csv('user_recommendations2.csv', index=False)
