In [None]:
###  EDA   ANALYSIS

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def perform_eda_analysis(data):
    # Summary Statistics
    print("Summary Statistics:")
    print(data.describe())
    print()

    # Data Quality Check
    print("Data Quality Check:")
    print("Missing Values:")
    print(data.isnull().sum())
    print()

    # Time Series Analysis
    print("Time Series Analysis:")
    # Plot GHI over time
    plt.figure(figsize=(12, 6))
    plt.plot(data['Timestamp'], data['GHI'])
    plt.xlabel('Timestamp')
    plt.ylabel('GHI')
    plt.title('GHI Over Time')
    plt.xticks(rotation=45)
    plt.show()
    # Repeat for other variables like DNI, DHI, Tamb

    # Correlation Analysis
    print("Correlation Analysis:")
    correlation_matrix = data[['GHI', 'DHI', 'DNI', 'TModA', 'TModB']].corr()
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
    plt.title('Correlation Matrix')
    plt.show()

    # Wind Analysis
    print("Wind Analysis:")
    # Plot wind speed (WS)
    plt.figure(figsize=(12, 6))
    plt.plot(data['Timestamp'], data['WS'])
    plt.xlabel('Timestamp')
    plt.ylabel('Wind Speed')
    plt.title('Wind Speed Over Time')
    plt.xticks(rotation=45)
    plt.show()
    # Repeat for other wind variables like WSgust, WSstdev, WD, WDstdev

    # Temperature Analysis
    print("Temperature Analysis:")
    # Compare module temperatures (TModA, TModB) with ambient temperature (Tamb)
    plt.figure(figsize=(12, 6))
    plt.plot(data['Timestamp'], data['TModA'], label='TModA')
    plt.plot(data['Timestamp'], data['TModB'], label='TModB')
    plt.plot(data['Timestamp'], data['Tamb'], label='Tamb')
    plt.xlabel('Timestamp')
    plt.ylabel('Temperature')
    plt.title('Temperature Comparison')
    plt.xticks(rotation=45)
    plt.legend()
    plt.show()

    # Histograms
    print("Histograms:")
    data[['GHI', 'DNI', 'DHI', 'WS', 'Tamb']].hist(bins=20, figsize=(12, 6))
    plt.tight_layout()
    plt.show()

    # Box Plots
    print("Box Plots:")
    data[['GHI', 'DNI', 'DHI', 'Tamb']].boxplot()
    plt.title('Box Plots')
    plt.show()

    # Scatter Plots
    print("Scatter Plots:")
    sns.pairplot(data[['GHI', 'Tamb', 'WS']])
    plt.show()

    # Data Cleaning
    print("Data Cleaning:")
    # Handle anomalies and missing values, e.g., drop columns with all null values like 'Comments'

    print("EDA analysis completed successfully!")
    
