In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import csv
from scipy.stats import skew

# 1. Leitura dos Dados
data = pd.read_csv("water_consumption.csv")

# Examinar a estrutura dos dados
print(data.head())
print(data.info())

# 2. Barplot Representando o Consumo Mensal de Água
def plot_monthly_water_consumption(year, start_month, end_month, park_id):
    # Filtrar os dados com base nos parâmetros fornecidos pelo usuário
    filtered_data = data[(data["Year"] == year) & (data["Month"] >= start_month) & (data["Month"] <= end_month) & (data["Park Identification"] == park_id)]

    # Agrupar o consumo mensal de água
    monthly_consumption = filtered_data.groupby("Month")["Consumption"].sum()

    # Criar o barplot
    plt.figure(figsize=(10, 6))
    plt.bar(monthly_consumption.index, monthly_consumption.values, color='skyblue')
    plt.title(f"Monthly Water Consumption - Park {park_id} ({year})")
    plt.xlabel("Month")
    plt.ylabel("Water Consumption (m3)")
    plt.xticks(np.arange(start_month, end_month + 1, 1))
    plt.grid(True)
    plt.show()

# Teste da função plot_monthly_water_consumption
plot_monthly_water_consumption(2023, 1, 12, "A")

# 2. Barplot Representando o Consumo Mensal de Água (continuação)

# 3. Cálculo da Média Mensal dos Custos de Consumo de Água
def average_monthly_water_cost(num_parks, park_id):
    # Filtrar os dados com base nos parâmetros fornecidos pelo usuário
    filtered_data = data[data["Park Identification"] == park_id].groupby(["Year", "Month"])["Consumption"].sum().reset_index()

    # Calcular o custo mensal médio relacionado ao consumo de água para o parque especificado
    filtered_data["Monthly Cost"] = filtered_data["Consumption"] * 0.7  # Custo de água é 0.7 AC/m3
    filtered_data["Monthly Cost"] = np.where(filtered_data["Consumption"] > 1000,
                                             filtered_data["Monthly Cost"] + (filtered_data["Consumption"] - 1000) * 0.7 * 0.15,
                                             filtered_data["Monthly Cost"])

    # Calcular a média dos custos mensais
    average_cost = filtered_data["Monthly Cost"].mean()

    return average_cost

# Teste da função average_monthly_water_cost
average_cost = average_monthly_water_cost(2, "A")
print(f"Average Monthly Cost for Park A: {average_cost:.2f} AC")

# 4. Análise Estatística entre o Parque com Maior e Menor Consumo de Água
def statistical_analysis(park_id_1, park_id_2):
    # Filtrar os dados para o parque com maior consumo de água
    park_data_1 = data[data["Park Identification"] == park_id_1]
    park_data_1_grouped = park_data_1.groupby(["Year", "Month"])["Consumption"].sum().reset_index()
    park_data_1["Monthly Cost"] = park_data_1_grouped["Consumption"] * 0.7
    park_data_1["Monthly Cost"] = np.where(park_data_1_grouped["Consumption"] > 1000,
                                           park_data_1["Monthly Cost"] + (park_data_1_grouped["Consumption"] - 1000) * 0.7 * 0.15,
                                           park_data_1["Monthly Cost"])

    # Filtrar os dados para o parque com menor consumo de água
    park_data_2 = data[data["Park Identification"] == park_id_2]
    park_data_2_grouped = park_data_2.groupby(["Year", "Month"])["Consumption"].sum().reset_index()
    park_data_2["Monthly Cost"] = park_data_2_grouped["Consumption"] * 0.7
    park_data_2["Monthly Cost"] = np.where(park_data_2_grouped["Consumption"] > 1000,
                                           park_data_2["Monthly Cost"] + (park_data_2_grouped["Consumption"] - 1000) * 0.7 * 0.15,
                                           park_data_2["Monthly Cost"])

    # Calcular as estatísticas para o parque com maior consumo de água
    mean_1 = park_data_1["Monthly Cost"].mean()
    median_1 = park_data_1["Monthly Cost"].median()
    std_deviation_1 = park_data_1["Monthly Cost"].std()
    skewness_1 = skew(park_data_1["Monthly Cost"])

    # Calcular as estatísticas para o parque com menor consumo de água
    mean_2 = park_data_2["Monthly Cost"].mean()
    median_2 = park_data_2["Monthly Cost"].median()
    std_deviation_2 = park_data_2["Monthly Cost"].std()
    skewness_2 = skew(park_data_2["Monthly Cost"])

    # Construir tabelas de frequência relativa e absoluta (dados classificados), considerando 5 classes
    bins = pd.cut(park_data_1["Monthly Cost"], bins=5, retbins=True)[1]
    frequency_table_1 = pd.cut(park_data_1["Monthly Cost"], bins=bins, include_lowest=True).value_counts().sort_index()
    relative_frequency_table_1 = (frequency_table_1 / frequency_table_1.sum()) * 100

    bins = pd.cut(park_data_2["Monthly Cost"], bins=5, retbins=True)[1]
    frequency_table_2 = pd.cut(park_data_2["Monthly Cost"], bins=bins, include_lowest=True).value_counts().sort_index()
    relative_frequency_table_2 = (frequency_table_2 / frequency_table_2.sum()) * 100

    # Verificar outliers
    Q1_1 = park_data_1["Monthly Cost"].quantile(0.25)
    Q3_1 = park_data_1["Monthly Cost"].quantile(0.75)
    IQR_1 = Q3_1 - Q1_1
    lower_bound_1 = Q1_1 - 1.5 * IQR_1
    upper_bound_1 = Q3_1 + 1.5 * IQR_1
    outliers_1 = park_data_1[(park_data_1["Monthly Cost"] < lower_bound_1) | (park_data_1["Monthly Cost"] > upper_bound_1)]

    Q1_2 = park_data_2["Monthly Cost"].quantile(0.25)
    Q3_2 = park_data_2["Monthly Cost"].quantile(0.75)
    IQR_2 = Q3_2 - Q1_2
    lower_bound_2 = Q1_2 - 1.5 * IQR_2
    upper_bound_2 = Q3_2 + 1.5 * IQR_2
    outliers_2 = park_data_2[(park_data_2["Monthly Cost"] < lower_bound_2) | (park_data_2["Monthly Cost"] > upper_bound_2)]

    # Imprimir resultados
    print(f"\nStatistics for Park {park_id_1}:")
    print(f"Mean: {mean_1:.2f} AC")
    print(f"Median: {median_1:.2f} AC")
    print(f"Standard Deviation: {std_deviation_1:.2f} AC")
    print(f"Skewness: {skewness_1:.2f}")
    print("\nFrequency Table:")
    print(frequency_table_1)
    print("\nRelative Frequency Table:")
    print(relative_frequency_table_1)
    print("\nOutliers:")
    print(outliers_1)

    print(f"\nStatistics for Park {park_id_2}:")
    print(f"Mean: {mean_2:.2f} AC")
    print(f"Median: {median_2:.2f} AC")
    print(f"Standard Deviation: {std_deviation_2:.2f} AC")
    print(f"Skewness: {skewness_2:.2f



SyntaxError: unterminated string literal (detected at line 128) (2826133506.py, line 128)