## pips and includes

In [4]:
import pandas as pd
import requests
import json
from datetime import datetime
import os

## Constants

In [5]:
DATA_DIRECTORY = "../data/"

## Analysis

### missing data analysis 

In [9]:
def calculate_missing_percentage(csv_path):
    df = pd.read_csv(csv_path, na_values=['None', 'null', '-', '', ' ', 'NaN', 'nan', 'NAN'], low_memory=False)
    missing_percentages = df.isnull().mean() * 100
    return missing_percentages

def process_missing_data(data_directory):
    missing_data = {}
    for filename in os.listdir(data_directory):
        if filename.endswith(".csv"):
            file_path = os.path.join(data_directory, filename)
            station_name = filename.replace('_data_2000_2023.csv', '')
            missing_data[station_name] = calculate_missing_percentage(file_path)
    
    missing_data_df = pd.DataFrame(missing_data).transpose()
    missing_data_df.fillna(100, inplace=True)
    missing_data_df['Average Percentage'] = missing_data_df.mean(axis=1)
    
    columns_to_exclude = ['BP (hPa)', 'Date Time', 'DiffR (w/m^2)', 'Grad (w/m^2)', 'NIP']
    columns_to_include = [col for col in missing_data_df.columns if col not in columns_to_exclude]
    
    missing_data_df['Average Percentage Excluding Specific Columns'] = missing_data_df[columns_to_include].mean(axis=1)
    missing_data_df = missing_data_df.sort_values(by='Average Percentage Excluding Specific Columns')
    missing_data_df = missing_data_df.round(1)
    
    return missing_data_df

missing_data_df = process_missing_data(DATA_DIRECTORY)


In [10]:
missing_data_df

Unnamed: 0,BP (hPa),Date Time,DiffR (w/m^2),Grad (w/m^2),NIP (w/m^2),RH (%),Rain,STDwd (deg),TD (degC),TDmax (degC),TDmin (degC),Time,WD (deg),WDmax (deg),WS (m/s),WSmax (m/s),Ws10mm (m/s),Ws1mm (m/s),Average Percentage,Average Percentage Excluding Specific Columns
Dafna,100.0,0.0,100.0,100.0,100.0,0.1,0.0,0.0,0.1,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.3,8.2
Kefar Blum,100.0,0.0,100.0,100.0,100.0,1.0,0.0,0.1,0.2,0.2,0.2,0.0,0.2,0.2,0.1,0.1,0.1,0.1,22.3,8.3
Eshhar,100.0,0.0,100.0,100.0,100.0,0.5,0.7,0.1,0.3,0.3,0.3,0.0,0.2,0.2,0.2,0.2,0.1,0.2,22.4,8.4
Shave Ziyyon,100.0,0.0,100.0,100.0,100.0,2.7,0.5,0.4,0.3,0.3,0.3,0.2,0.4,0.4,0.3,0.3,0.3,0.3,22.6,8.6
Merom Golan Picman,100.0,0.0,100.0,4.4,100.0,6.5,3.0,0.7,0.0,0.1,0.1,0.4,1.1,1.2,0.5,0.5,0.6,0.5,17.8,8.9
Gamla,100.0,0.0,100.0,74.7,100.0,1.0,0.4,0.3,0.1,0.1,0.1,5.7,0.5,0.5,0.5,0.5,5.9,0.2,21.7,9.1
Kefar Giladi,100.0,0.0,100.0,100.0,100.0,0.4,2.4,1.7,0.1,0.1,0.1,0.1,1.8,1.8,1.6,1.6,1.4,1.4,23.0,9.2
Kefar Nahum,100.0,0.0,100.0,100.0,100.0,1.7,0.0,0.0,2.2,2.1,2.2,3.6,0.0,0.0,0.0,0.0,3.6,0.0,23.1,9.2
Avne Etan,100.0,0.0,100.0,100.0,100.0,3.0,0.3,0.6,0.1,0.1,0.1,2.3,0.9,0.9,0.6,0.7,3.1,3.1,23.1,9.2
Elon,100.0,0.0,100.0,100.0,100.0,10.1,7.9,0.4,0.4,0.4,0.4,0.3,0.4,0.5,0.4,0.4,0.4,0.4,23.5,9.7
