In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Define the activities and their associated water usage per unit
activities = ['Showering', 'Dishwashing', 'Laundry', 'Swimming', 'Car Washing', 'Gardening']
water_usage_per_unit = {'Showering': 7.9, 'Dishwashing': 15, 'Laundry': 50, 'Swimming': 50000, 'Car Washing': 100, 'Gardening': 20}
typical_duration_or_frequency = {
    'Showering': 10,  # Minutes per day
    'Dishwashing': 1,  # Times per day
    'Laundry': 3,  # Loads per week
    'Swimming': 1,  # Pool use per month
    'Car Washing': 1,  # Times per month
    'Gardening': 15  # Minutes per day
}

# Number of samples (users)
num_samples = 100

# Generate random data for baseline training
data = []
for _ in range(num_samples):
    activity = np.random.choice(activities)
    frequency = np.random.randint(1, 31)  # Random frequency between 1 and 30
    time_spent = np.random.randint(1, 120)  # Random time spent between 1 and 120 minutes (for activities like showering, gardening)
    water_used = water_usage_per_unit[activity] * (frequency if activity in ['Dishwashing', 'Laundry', 'Swimming', 'Car Washing'] else time_spent)
    data.append({
        'activity': activity,
        'time_spent': time_spent,
        'frequency': frequency,
        'water_used': water_used
    })

# Create DataFrame for generated data
df = pd.DataFrame(data)

# Simulated user input for tracking
user_data = {
    'Showering': 30,  # 30 minutes per day
    'Dishwashing': 7,  # 5 times per week
    'Laundry': 3,  # 3 loads per week
    'Car Washing': 1,  # 1 time per month
    'Gardening': 15  # 15 minutes per day
}

# Calculate personalized total water usage
total_water_usage = 0
for activity, frequency in user_data.items():
    if activity in water_usage_per_unit:
        if activity in ['Showering', 'Dishwashing', 'Gardening']:
            total_water_usage += water_usage_per_unit[activity] * frequency * 30  # Monthly
        elif activity in ['Laundry', 'Swimming', 'Car Washing']:
            total_water_usage += water_usage_per_unit[activity] * frequency  # Monthly usage

print(f"Total Water Usage: {total_water_usage} liters")

# Machine Learning Model
# Features: 'time_spent' and 'frequency'
X = df[['time_spent', 'frequency']]
y = df['water_used']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Model predictions
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Function to provide personalized tips
def generate_tips(user_data, total_usage):
    tips = []
    if user_data['Showering'] * 7.9 * 30 > 1000:
        tips.append("Reduce shower time by 2-3 minutes.")
    if user_data['Dishwashing'] > 2:
        tips.append("Use the dishwasher only when full.")
    if user_data['Laundry'] > 3:
        tips.append("Wash full loads only.")
    if user_data['Car Washing'] > 1:
        tips.append("Reduce car washing frequency.")
    if user_data['Gardening'] > 10:
        tips.append("Consider efficient irrigation.")
    return tips

# Tips based on user behavior
tips = generate_tips(user_data, total_water_usage)
print("\nPractical Tips to Reduce Your Water Footprint:")
for tip in tips:
    print(f"- {tip}")

# Visualization of Total Water Usage
def plot_total_usage():
    plt.bar(user_data.keys(), [user_data[activity] * water_usage_per_unit[activity] for activity in user_data.keys()])
    plt.title("User's Monthly Water Usage by Activity")
    plt.xlabel('Activity')
    plt.ylabel('Water Usage (liters)')
    plt.show()

plot_total_usage()
