# Combining Our Local Data Sources

## Combining `.tif` File Data

In [24]:
from rasterio.plot import plotting_extent
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import rasterio

# Toggle to run the .tif --> pd df --> .csv conversion
run_convert = False

In [25]:
# gsa
if run_convert:
    tif_files = {}
    pvo_datasets = {}
    for i in range (1, 13):
        if i < 10:
            tif_file_wst = f"../../Data/gsa/westhem-monthly/PVOUT_0{i}.tif"
        else:
            tif_file_wst = f"../../Data/gsa/westhem-monthly/PVOUT_{i}.tif"
        tif_files[i] = tif_file_wst
        
        # Data values
        lats = []
        longs = []
        pvo_vals = []
        
        # Reading the .tif file
        with rasterio.open(tif_file_wst) as ds:
            pvo_data = ds.read(1)  # Reading the first band
            
            # Data dimensions
            h = ds.height
            w = ds.width
            t = ds.transform

            # Extracting the information of each data points from processed .tif file
            for row in range(h):
                for col in range(w):
                    pvo_val = pvo_data[row, col]
                    if pvo_val != ds.nodata and not np.isnan(pvo_val):
                        
                        # Converting pixel coords to geographic coords
                        long, lat = rasterio.transform.xy(t, row, col)
                        lats.append(lat)
                        longs.append(long)
                        pvo_vals.append(pvo_val)

            # Creating a DataFrame
            df = pd.DataFrame({
                "latitude": lats,
                "longitude": longs,
                "pvo": pvo_vals
            })
            pvo_datasets[i] = df

    # Displaying the dataframe for the first .tif file
    pvo_datasets[1].head()        

In [26]:
# nrel
if run_convert:
    irradiance_files = ["../../Data/nrel/monthly-ghi/ghi_jan.tif", 
                        "../../Data/nrel/monthly-ghi/ghi_feb.tif",
                        "../../Data/nrel/monthly-ghi/ghi_mar.tif",
                        "../../Data/nrel/monthly-ghi/ghi_apr.tif",
                        "../../Data/nrel/monthly-ghi/ghi_may.tif",
                        "../../Data/nrel/monthly-ghi/ghi_jun.tif",
                        "../../Data/nrel/monthly-ghi/ghi_jul.tif",
                        "../../Data/nrel/monthly-ghi/ghi_aug.tif",
                        "../../Data/nrel/monthly-ghi/ghi_sep.tif",
                        "../../Data/nrel/monthly-ghi/ghi_oct.tif",
                        "../../Data/nrel/monthly-ghi/ghi_nov.tif",
                        "../../Data/nrel/monthly-ghi/ghi_dec.tif"]
    irr_datasets = {}
    for i in range (1, 13):
        with rasterio.open(irradiance_files[i - 1]) as ds:
            irr_data = ds.read(1) 
            
            height = ds.height
            width = ds.width
            transform = ds.transform
            
            lats, longs, irr_vals = [], [], []
            
            # Extract data points
            for row in range(height):
                for col in range(width):
                    irr_val = irr_data[row, col]
                    if irr_val != ds.nodata and not np.isnan(irr_val):
                        # Convert pixel coords to geographic coords
                        long, lat = rasterio.transform.xy(transform, row, col)
                        lats.append(lat)
                        longs.append(long)
                        irr_vals.append(irr_val)
            
            irr_df = pd.DataFrame({
                'latitude': lats,
                'longitude': longs,
                'irradiance': irr_vals
            })
            irr_datasets[i] = irr_df

    # Displaying the dataframe for the first .tif file
    irr_datasets[1].head()

In [27]:
# Exporting the dataframes to .csv files
if run_convert:
    df_pvo_full = pd.read_csv("../../Data/csvs/months_pvo/pvout_m1.csv")
    df_irr_full = pd.read_csv("../../Data/csvs/months_irr/irr_m1.csv")
    df_pvo_full["month"] = 1
    df_irr_full["month"] = 1
    for i in range(2, 13):
        df_pvo_curr = pd.read_csv(f"../../Data/csvs/months_pvo/pvout_m{i}.csv")
        df_irr_curr = pd.read_csv(f"../../Data/csvs/months_irr/irr_m{i}.csv")

        df_pvo_curr["month"] = i
        df_irr_curr["month"] = i
        df_pvo_full = pd.concat([df_pvo_full, df_pvo_curr], ignore_index = True)
        df_irr_full = pd.concat([df_irr_full, df_irr_curr], ignore_index = True)
                    
    df_pvo_full.to_csv("../../Data/csvs/pvout_full.csv", index = False)
    df_irr_full.to_csv("../../Data/csvs/irr_full.csv", index = False)