# Exploratory Data Analyst - EDA for 35117010_MetaRiver Station
Adaptation by D.A. Gómez-Latorre.<br>
November 12, 2025.<br>
Variables: Precipitation, Temperature (min/max) and Flow<br> 
Period: 2000-2015<br>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Display settings
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (15, 10)
plt.rcParams['font.size'] = 10

In [2]:
# ============================================================================
# 1. Data loading and preparation
# ============================================================================

def data_load(filepath):
    """
    Load the hydrological data and prepare the DataFrame.
    """
    print("="*80)
    print("1. LOAD DATA")
    print("="*80)
    
    ## Read data (tab-delimited)
    df = pd.read_csv(filepath, sep='\t', skiprows=1)
    
    # Rename columns for easier management
    df.columns = ['Date', 'Hour', 'Precip_mm', 'Temp_min_C', 'Temp_max_C', 'Flow_m3s']
    
    # Convert date to datetime
    df['Date'] = pd.to_datetime(df['Date'], format='%d.%m.%Y')
    
    # Create complete datetime column
    df['DateTime'] = df['Date']
    
    # Sort by date
    df = df.sort_values('Date').reset_index(drop=True)
    
    print(f"\n✓ Data successfully uploaded")
    print(f"  - Total records: {len(df)}")
    print(f"  - Period: {df['Date'].min().strftime('%Y-%m-%d')} a {df['Date'].max().strftime('%Y-%m-%d')}")
    print(f"  - Duration: {(df['Date'].max() - df['Date'].min()).days} days")
    
    return df

In [3]:
#============================================================================
# 2. Descriptive statistics
# ============================================================================

def descriptive_statistics(df):
    """
    Calculate complete descriptive statistics
    """
    print("\n" + "="*80)
    print("2. DESCRIPTIVE STATISTICS")
    print("="*80)
    
    variables = ['Precip_mm', 'Temp_min_C', 'Temp_max_C', 'Flow_m3s']
    
    print("\n--- Basic Statistics ---")
    print(df[variables].describe().round(2))
    
    print("\n--- Additional Statistics ---")
    additionals_stats = pd.DataFrame({
        'Asimetry': df[variables].skew(),
        'Curtosis': df[variables].kurtosis(),
        'CV (%)': (df[variables].std() / df[variables].mean() * 100),
        'Range': df[variables].max() - df[variables].min()
    }).round(2)
    print(additionals_stats)
    
    print("\n--- Extrem Values ---")
    for var in variables:
        q1 = df[var].quantile(0.25)
        q3 = df[var].quantile(0.75)
        iqr = q3 - q1
        outliers = df[(df[var] < q1 - 1.5*iqr) | (df[var] > q3 + 1.5*iqr)]
        print(f"{var}: {len(outliers)} outliers ({len(outliers)/len(df)*100:.2f}%)")
    
    return additionals_stats

In [4]:
# ============================================================================
# 3. Missing data analyst
# ============================================================================

def missing_data_analyst(df):
    """
    Analyze the presence and distribution of missing data
    """
    print("\n" + "="*80)
    print("3. MISSING DATA ANALYSIS")
    print("="*80)
    
    variables = ['Precip_mm', 'Temp_min_C', 'Temp_max_C', 'Flow_m3s']
    
    missing_data = pd.DataFrame({
        'Missing_values': df[variables].isnull().sum(),
        'Percentage': (df[variables].isnull().sum() / len(df) * 100).round(2)
    })
    
    print("\n", missing_data)
    
    # Check for zero values in precipitation (these may be normal)
    print(f"\nDays without precipitation (P=0): {(df['Precip_mm'] == 0).sum()} ({(df['Precip_mm'] == 0).sum()/len(df)*100:.2f}%)")
    
    return missing_data

In [5]:
# ============================================================================
# 4. TEMPORAL ANALYST
# ============================================================================

def temporal_analyst(df):
    """
    Analyze seasonal patterns: trends, seasonality
    """
    print("\n" + "="*80)
    print("4. TEMPORAL ANALYST")
    print("="*80)
    
      # Create temporary variables
    df['Year'] = df['Date'].dt.year
    df['Month'] = df['Date'].dt.month
    df['Month_name'] = df['Date'].dt.month_name()
    df['Quarter'] = df['Date'].dt.quarter
    df['Day_year'] = df['Date'].dt.dayofyear
    
    # Statistics by year
    print("\n--- Annual Statistics ---")
    annual = df.groupby('Year').agg({
        'Precip_mm': ['sum', 'mean', 'std'],
        'Flow_m3s': ['mean', 'max', 'min'],
        'Temp_max_C': 'mean'
    }).round(2)
    print(annual)
    
    # Monthly statistics (average for all years)
    print("\n--- # Monthly statistics (average for all years) ---")
    monthly = df.groupby('Month').agg({
        'Precip_mm': 'mean',
        'Flow_m3s': 'mean',
        'Temp_min_C': 'mean',
        'Temp_max_C': 'mean'
    }).round(2)
    print(monthly)
    
    return df

In [6]:
# ============================================================================
# 5. CORRELATION ANALYST
# ============================================================================

def correlation_analyst(df):
    """
    Analyze correlations between variables
    """
    print("\n" + "="*80)
    print("5. CORRELATION ANALYST")
    print("="*80)
    
    variables = ['Precip_mm', 'Temp_min_C', 'Temp_max_C', 'Flow_m3s']
    
    # Pearson correlation
    print("\n--- Pearson correlation---")
    corr_pearson = df[variables].corr(method='pearson')
    print(corr_pearson.round(3))
    
    # Spearman correlation (by non-linear correlations)
    print("\n--- Spearman correlation ---")
    corr_spearman = df[variables].corr(method='spearman')
    print(corr_spearman.round(3))
    
    return corr_pearson, corr_spearman

In [7]:
# ============================================================================
# 6. VISUALIZATIONS
# ============================================================================

def create_visualizations(df, corr_pearson, output_folder='./'):
    """
    Create comprehensive visualizations of exploratory analysis
    """
    print("\n" + "="*80)
    print("6. GENERATION OF VISUALIZATIONS")
    print("="*80)
    
    variables = ['Precip_mm', 'Temp_min_C', 'Temp_max_C', 'Flow_m3s']
    labels = ['Precipitation (mm)', 'Minimum Temp. (°C)', 
              'Maximum Temp. (°C)', 'Flow (m³/s)']
    
    # ----- FIGURE 1: Temporal series -----
    fig, axes = plt.subplots(4, 1, figsize=(16, 12))
    fig.suptitle('Temporal Series - Hyd35117010 Station', fontsize=16, fontweight='bold')
    
    for i, (var, label) in enumerate(zip(variables, labels)):
        axes[i].plot(df['Date'], df[var], linewidth=0.8, alpha=0.7)
        axes[i].set_ylabel(label, fontweight='bold')
        axes[i].grid(True, alpha=0.3)
        
        # Add 30-day moving average
        df[f'{var}_ma30'] = df[var].rolling(window=30, center=True).mean()
        axes[i].plot(df['Date'], df[f'{var}_ma30'], color='red', 
                    linewidth=2, label='30-day moving average')
        axes[i].legend(loc='upper right')
    
    axes[-1].set_xlabel('date', fontweight='bold')
    plt.tight_layout()
    plt.savefig(f'{output_folder}fig1_temporal_series.png', dpi=300, bbox_inches='tight')
    print("✓ Figure 1 saves: Temporal Series")
    
    # ----- FIGURE 2: Distributions -----
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    fig.suptitle('Distribution of varaibles', fontsize=16, fontweight='bold')
    axes = axes.ravel()
    
    for i, (var, label) in enumerate(zip(variables, labels)):
        # Histogram with KDE
        axes[i].hist(df[var], bins=50, alpha=0.6, edgecolor='black', density=True)
        
        # Adjust normal distribution
        mu, std = df[var].mean(), df[var].std()
        xmin, xmax = axes[i].get_xlim()
        x = np.linspace(xmin, xmax, 100)
        p = stats.norm.pdf(x, mu, std)
        axes[i].plot(x, p, 'r-', linewidth=2, label='Theoretic normal')
        
        axes[i].set_xlabel(label, fontweight='bold')
        axes[i].set_ylabel('Dense', fontweight='bold')
        axes[i].legend()
        axes[i].grid(True, alpha=0.3)
        
        # Add statistisc
        axes[i].text(0.02, 0.98, f'Mean: {mu:.2f}\nStd: {std:.2f}\nAsimetry: {df[var].skew():.2f}',
                    transform=axes[i].transAxes, verticalalignment='top',
                    bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5), fontsize=9)
    
    plt.tight_layout()
    plt.savefig(f'{output_folder}fig2_distributions.png', dpi=300, bbox_inches='tight')
    print("✓ Figure 2 saved: Distributions")
    
    # ----- FIGURE 3: Box plots -----
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    fig.suptitle('Box Plots - Outliers Detection', fontsize=16, fontweight='bold')
    axes = axes.ravel()
    
    for i, (var, label) in enumerate(zip(variables, labels)):
        axes[i].boxplot(df[var], vert=True, patch_artist=True,
                       boxprops=dict(facecolor='lightblue', alpha=0.7),
                       medianprops=dict(color='red', linewidth=2))
        axes[i].set_ylabel(label, fontweight='bold')
        axes[i].grid(True, alpha=0.3, axis='y')
    
    plt.tight_layout()
    plt.savefig(f'{output_folder}fig3_boxplots.png', dpi=300, bbox_inches='tight')
    print("✓ Figura 3 saved: Box plots")
    
    # ----- FIGURE 4: Correlation Matrix -----
    fig, ax = plt.subplots(figsize=(10, 8))
    im = ax.imshow(corr_pearson, cmap='coolwarm', aspect='auto', vmin=-1, vmax=1)
    
    # Configuration ticks
    ax.set_xticks(np.arange(len(labels)))
    ax.set_yticks(np.arange(len(labels)))
    ax.set_xticklabels(labels, rotation=45, ha='right')
    ax.set_yticklabels(labels)
    
    # Add values to cells
    for i in range(len(labels)):
        for j in range(len(labels)):
            text = ax.text(j, i, f'{corr_pearson.iloc[i, j]:.2f}',
                          ha="center", va="center", color="black", fontweight='bold')
    
    ax.set_title('Correlation Matrix (Pearson)', fontsize=14, fontweight='bold', pad=20)
    fig.colorbar(im, ax=ax, label='Correlation COeficiente')
    plt.tight_layout()
    plt.savefig(f'{output_folder}fig4_correlation.png', dpi=300, bbox_inches='tight')
    print("✓ Figure 4 saved: Correlation matrix")
    
    # ----- FIGURE 5: Annual Cycle -----
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    fig.suptitle('Annual Cycle - Monthly Statistics', fontsize=16, fontweight='bold')
    axes = axes.ravel()
    
    names_month = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
                     'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    
    for i, (var, label) in enumerate(zip(variables, labels)):
        mensual_stats = df.groupby('Month')[var].agg(['mean', 'std', 'min', 'max'])
        
        axes[i].plot(mensual_stats.index, mensual_stats['mean'], 
                    marker='o', linewidth=2, markersize=8, label='Media')
        axes[i].fill_between(mensual_stats.index, 
                            mensual_stats['mean'] - mensual_stats['std'],
                            mensual_stats['mean'] + mensual_stats['std'],
                            alpha=0.3, label='± 1 Std')
        
        axes[i].set_xlabel('Month', fontweight='bold')
        axes[i].set_ylabel(label, fontweight='bold')
        axes[i].set_xticks(range(1, 13))
        axes[i].set_xticklabels(names_month, rotation=45)
        axes[i].legend(loc='best')
        axes[i].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(f'{output_folder}fig5_annual_cycle.png', dpi=300, bbox_inches='tight')
    print("✓ Figure 5 saved: Annual Cycle")
    
    # ----- FIGURA 6: Scatter Plots -----
    fig, axes = plt.subplots(2, 3, figsize=(16, 10))
    fig.suptitle('Relations between variables', fontsize=16, fontweight='bold')
    
    pairs = [
        ('Precip_mm', 'Flow_m3s'),
        ('Temp_max_C', 'Flow_m3s'),
        ('Temp_min_C', 'Temp_max_C'),
        ('Precip_mm', 'Temp_max_C'),
        ('Temp_min_C', 'Flow_m3s'),
        ('Precip_mm', 'Temp_min_C')
    ]
    
    axes = axes.ravel()
    
    for i, (var_x, var_y) in enumerate(pairs):
        # Scatter plot
        axes[i].scatter(df[var_x], df[var_y], alpha=0.3, s=10)
        
        # Line tendency
        z = np.polyfit(df[var_x].dropna(), df[var_y].dropna(), 1)
        p = np.poly1d(z)
        axes[i].plot(df[var_x].sort_values(), p(df[var_x].sort_values()), 
                    "r--", linewidth=2, label='Tendency')
        
        # Correlation
        corr = df[[var_x, var_y]].corr().iloc[0, 1]
        axes[i].text(0.05, 0.95, f'r = {corr:.3f}',
                    transform=axes[i].transAxes, verticalalignment='top',
                    bbox=dict(boxstyle='round', facecolor='yellow', alpha=0.5))
        
        axes[i].set_xlabel(var_x.replace('_', ' '), fontweight='bold')
        axes[i].set_ylabel(var_y.replace('_', ' '), fontweight='bold')
        axes[i].grid(True, alpha=0.3)
        axes[i].legend()
    
    plt.tight_layout()
    plt.savefig(f'{output_folder}fig6_scatter_plots.png', dpi=300, bbox_inches='tight')
    print("✓ Figure 6 saved: Scatter plots")
    
    # ----- FIGURE 7: Annual Trend Analysis -----
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    fig.suptitle('Annual Trend', fontsize=16, fontweight='bold')
    axes = axes.ravel()
    
    anual_precip = df.groupby('Year')['Precip_mm'].sum()
    anual_flow = df.groupby('Year')['Flow_m3s'].mean()
    anual_tmin = df.groupby('Year')['Temp_min_C'].mean()
    anual_tmax = df.groupby('Year')['Temp_max_C'].mean()
    
    anual_data = [anual_precip, anual_tmin, anual_tmax, anual_flow]
    titles = ['Annual Total Precipitation (mm)', 'Mean Minimum Temperature (°C)',
               'Mean Maximum Temperature (°C)', 'Mean FLow (m³/s)']
    
    for i, (data, title) in enumerate(zip(anual_data, titles)):
        axes[i].plot(data.index, data.values, marker='o', linewidth=2, markersize=8)
        
        # Linear trend
        z = np.polyfit(data.index, data.values, 1)
        p = np.poly1d(z)
        axes[i].plot(data.index, p(data.index), "r--", linewidth=2, 
                    label=f'Trend: {z[0]:.3f}/year')
        
        axes[i].set_xlabel('year', fontweight='bold')
        axes[i].set_ylabel(title, fontweight='bold')
        axes[i].legend()
        axes[i].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(f'{output_folder}fig7_annual_trend.png', dpi=300, bbox_inches='tight')
    print("✓ Figure 7 saved: Annual Trend")
    
    plt.close('all')
    print("\n✓ All successfully generated visualizations")

In [8]:
# ============================================================================
# 7. SEASONALITY ANALYSIS 
# ============================================================================

def seasonality_analysis(df):
    """
    Analyze seasonal patterns using multi-year monthly averages
    """
    print("\n" + "="*80)
    print("7. SEASONALITY ANALYSIS")
    print("="*80)
    
    # Calculate seasonal component (multi-year monthly average)
    df_copy = df.copy()
    df_copy['Year'] = df_copy['Date'].dt.year
    df_copy['Month'] = df_copy['Date'].dt.month
    
    # Monthly average (seasonal component)
    monthly_seasonality = df_copy.groupby('Month').agg({
        'Precip_mm': 'mean',
        'Flow_m3s': 'mean',
        'Temp_max_C': 'mean'
    })
    
    # Calculate anomalies (difference from the monthly average)
    df_copy = df_copy.merge(monthly_seasonality, on='Month', suffixes=('', '_seasonal'))
    df_copy['Precip_anomaly'] = df_copy['Precip_mm'] - df_copy['Precip_mm_seasonal']
    df_copy['Flow_anomaly'] = df_copy['Flow_m3s'] - df_copy['Flow_m3s_seasonal']
    df_copy['Temp_anomaly'] = df_copy['Temp_max_C'] - df_copy['Temp_max_C_seasonal']
    
    # Create visualization
    fig, axes = plt.subplots(3, 2, figsize=(16, 12))
    fig.suptitle('Seasonality Analysis', fontsize=16, fontweight='bold')
    
    variables = [
        ('Precip_mm', 'Precip_anomaly', 'Precipitation (mm)'),
        ('Flow_m3s', 'Flow_anomaly', 'Flow (m³/s)'),
        ('Temp_max_C', 'Temp_anomaly', 'Maximum Temperature (°C)')
    ]
    
    monthly_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
                     'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    
    for i, (var, anomaly_var, label) in enumerate(variables):
        # Left panel: Seasonal component (monthly average)
        monthly_mean = df_copy.groupby('Month')[var].mean()
        monthly_std = df_copy.groupby('Month')[var].std()
        
        axes[i, 0].plot(monthly_mean.index, monthly_mean.values, 
                       marker='o', linewidth=2, markersize=8, color='blue')
        axes[i, 0].fill_between(monthly_mean.index,
                                monthly_mean - monthly_std,
                                monthly_mean + monthly_std,
                                alpha=0.3, color='blue')
        axes[i, 0].set_ylabel(label, fontweight='bold')
        axes[i, 0].set_xlabel('Month', fontweight='bold')
        axes[i, 0].set_xticks(range(1, 13))
        axes[i, 0].set_xticklabels(monthly_names, rotation=45)
        axes[i, 0].set_title('Seasonality Component', fontweight='bold')
        axes[i, 0].grid(True, alpha=0.3)
        
        # Right panel: Anomalies (time series)
        axes[i, 1].plot(df_copy['Date'], df_copy[anomaly_var], 
                       linewidth=0.5, alpha=0.6, color='gray')
        axes[i, 1].axhline(y=0, color='red', linestyle='--', linewidth=2)
        
        # Moving average of anomalies
        window = 30
        rolling_mean = df_copy[anomaly_var].rolling(window=window, center=True).mean()
        axes[i, 1].plot(df_copy['Date'], rolling_mean, 
                       color='darkred', linewidth=2, label=f'Moving Average {window}d')
        
        axes[i, 1].set_ylabel(f'Anomaly - {label}', fontweight='bold')
        axes[i, 1].set_xlabel('Date', fontweight='bold')
        axes[i, 1].set_title('Anomalys (Observed - Seasonal)', fontweight='bold')
        axes[i, 1].grid(True, alpha=0.3)
        axes[i, 1].legend(loc='upper right')
    
    plt.tight_layout()
    plt.savefig('./fig8_seasonality.png', dpi=300, bbox_inches='tight')
    print("✓ Figure 8 saved: Seasonality Analyst")
    
    # Print seasonality statistics
    print("\n--- Seasonality Index ---")
    print("(Coefficient of variation of the monthly average)")
    for var, _, label in variables:
        monthly_mean = df_copy.groupby('Month')[var].mean()
        seasonal_cv = (monthly_mean.std() / monthly_mean.mean()) * 100
        print(f"{label}: {seasonal_cv:.2f}%")
    
    plt.close()

In [9]:
# ============================================================================
# 8. FINAL SUMMARY
# ============================================================================

def generate_summary(df, additional_stats, output_folder='./'):
    """
    Generate a complete summary of the analysis
    """
    print("\n" + "="*80)
    print("8. SUMMARY ANALYST")
    print("="*80)
    
    with open(f'{output_folder}summary_EDA.txt', 'w', encoding='utf-8') as f:
        f.write("="*80 + "\n")
        f.write("SUMMARY - EXPLORATORY DATA ANALYST\n")
        f.write("STATION: Hyd_35117010\n")
        f.write("="*80 + "\n\n")
        
        f.write(f"Analysis period: {df['Date'].min().strftime('%Y-%m-%d')} a {df['Date'].max().strftime('%Y-%m-%d')}\n")
        f.write(f"Number of records: {len(df)}\n")
        f.write(f"Duration: {(df['Date'].max() - df['Date'].min()).days} days\n\n")
        
        f.write("VARIABLES ANALYZED:\n")
        f.write("  - Precipitation (mm)\n")
        f.write("  - Minimum temperature (°C)\n")
        f.write("  - Maximum temperature (°C)\n")
        f.write("  - Flow (m³/s)\n\n")
        
        f.write("DESCRIPTIVE STATISTICS:\n")
        f.write(df[['Precip_mm', 'Temp_min_C', 'Temp_max_C', 'Flow_m3s']].describe().to_string())
        f.write("\n\n")
        
        f.write("MAIN FINDINGS:\n")
        f.write(f"  - Average daily precipitation: {df['Precip_mm'].mean():.2f} mm\n")
        f.write(f"  - Total precipitation for the period: {df['Precip_mm'].sum():.2f} mm\n")
        f.write(f"  - Average flow: {df['Flow_m3s'].mean():.2f} m³/s\n")
        f.write(f"  - Maximum recorded flow: {df['Flow_m3s'].max():.2f} m³/s\n")
        f.write(f"  - Average temperature range: {(df['Temp_max_C'] - df['Temp_min_C']).mean():.2f} °C\n\n")
        
        f.write("FILES GENERATED:\n")
        f.write("  - fig1_temporal_series.png\n")
        f.write("  - fig2_distributions.png\n")
        f.write("  - fig3_boxplots.png\n")
        f.write("  - fig4_correlation.png\n")
        f.write("  - fig5_annual_cycle.png\n")
        f.write("  - fig6_scatter_plots.png\n")
        f.write("  - fig7_annual_trend.png\n")
        f.write("  - fig8_seasonality.png\n")
        f.write("  - summary_EDA.txt\n")
    
    print("✓ Summary saved in: summary_EDA.txt")

In [10]:
# ============================================================================
# PRINCIPAL FUNCTION
# ============================================================================

def main():
    """
    Main function that performs all exploratory analysis
    """
    print("\n")
    print("█" * 80)
    print("█" + " " * 78 + "█")
    print("█" + "  EXPLORATORY DATA ANALYSIS - HYDROLOGICAL STATION Hyd35117010  ".center(78) + "█")
    print("█" + " " * 78 + "█")
    print("█" * 80)
    print("\n")
    
    # Ruta del archivo
    filepath = '/Users/andresgomezlatorre/TRAINING/LSTM_30/data/Hyd35117010_input.txt'
    
    try:
        # 1. Load data
        df = data_load(filepath)
        
        # 2. Descriptives statistics
        additional_stats = descriptive_statistics(df)
        
        # 3. Analysis of missing data
        missing_data_analyst(df)
        
        # 4. Temporal Analyst
        df = temporal_analyst(df)
        
        # 5. Correlation Analyst
        corr_pearson, corr_spearman = correlation_analyst(df)
        
        # 6. Create visualizations
        create_visualizations(df, corr_pearson)
        
        # 7. Seasonality analyst
        seasonality_analysis(df)
        
        # 8. Summary
        generate_summary(df, additional_stats)
        
        print("\n" + "="*80)
        print("✓ ANALYSIS SUCCESSFULLY COMPLETED")
        print("="*80)
        print("\nAll results have been saved in the current directory.")
        print("Review the 8 figures generated and the summary_EDA.txt file.")
        
    except Exception as e:
        print(f"\n❌ ERROR: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()



████████████████████████████████████████████████████████████████████████████████
█                                                                              █
█         EXPLORATORY DATA ANALYSIS - HYDROLOGICAL STATION Hyd35117010         █
█                                                                              █
████████████████████████████████████████████████████████████████████████████████


1. LOAD DATA

✓ Data successfully uploaded
  - Total records: 5753
  - Period: 2000-04-01 a 2015-12-31
  - Duration: 5752 days

2. DESCRIPTIVE STATISTICS

--- Basic Statistics ---
       Precip_mm  Temp_min_C  Temp_max_C  Flow_m3s
count    5753.00     5753.00     5753.00   5753.00
mean        7.41       18.17       25.56   1576.05
std        10.70        0.87        1.70   1038.28
min         0.00       13.24       19.69    117.00
25%         0.11       17.63       24.40    576.80
50%         2.46       18.21       25.61   1502.00
75%        10.88       18.77       26.70   2339.00
max