In [None]:
# Import necessary libraries
import os
import pandas as pd

In [None]:
# Connect to Google Drive
from google.colab import drive

drive.mount('/content/drive', force_remount=True)

In [None]:
# Change directory to ndvi folder
%cd drive/MyDrive/ndvi

In [None]:
# Add the data files in the folder into a list and print it out
ndvi_files_list = []
for file in os.listdir():
  if file.endswith('.csv'):
    ndvi_files_list.append(file)

print("List of Files in NDVI Folder:")
print(*ndvi_files_list, sep = "\n")

In [None]:
# Sort the filenames based on the year and quarter
ndvi_files_sorted = sorted(ndvi_files_list, key=lambda x: (int(x.split('_')[1]), x.split('_')[2]))

# Concatenate the dataframes
ndvi_dfs = [pd.read_csv(filename) for filename in ndvi_files_sorted]
ndvi_combined_df = pd.concat(ndvi_dfs, ignore_index=True)

display(ndvi_combined_df)

In [None]:
# Remove irrelevant columns
ndvi_dropCol_df= ndvi_combined_df.drop(["system:index", ".geo"], axis=1)
display(ndvi_dropCol_df)

In [None]:
# Check for any null values in the data
ndvi_dropCol_df.isnull()

In [None]:
# Check for the sum of null values (if any) for each column
ndvi_dropCol_df.isnull().sum()

In [None]:
# Fill missing values using linear interpolation
ndvi_dropCol_df['ndvi'] = ndvi_dropCol_df['ndvi'].interpolate(method="linear")

display(ndvi_dropCol_df)

In [None]:
# Check for the sum of null values after missing values are filled
ndvi_dropCol_df.isnull().sum()

In [None]:
# Export dataframe into a csv file in Google Drive
ndvi_dropCol_df.to_csv('NDVI_Complete.csv', index=False)

In [None]:
# Get current directory
current_directory = os.getcwd()
print("Current Directory:", current_directory)

In [None]:
# Change directory to parent directory
%cd ..

In [None]:
# Change directory to gnvi folder
%cd gndvi

In [None]:
# Add the data files in the folder into a list and print it out
gndvi_files_list = []
for file in os.listdir():
  if file.endswith('.csv'):
    gndvi_files_list.append(file)

print("List of Files in GNDVI Folder:")
print(*gndvi_files_list, sep = "\n")

In [None]:
# Sort the filenames based on the year and quarter
gndvi_files_sorted = sorted(gndvi_files_list, key=lambda x: (int(x.split('_')[1]), x.split('_')[2]))

# Concatenate the dataframes
gndvi_dfs = [pd.read_csv(filename) for filename in gndvi_files_sorted]
gndvi_combined_df = pd.concat(gndvi_dfs, ignore_index=True)

display(gndvi_combined_df)

In [None]:
# Remove irrelevant columns
gndvi_dropCol_df= gndvi_combined_df.drop(["system:index", ".geo"], axis=1)
display(gndvi_dropCol_df)

In [None]:
# Check for any null values (if any) in the data
gndvi_dropCol_df.isnull()

In [None]:
# Check for the sum of null values (if any) for each column
gndvi_dropCol_df.isnull().sum()

In [None]:
# Fill missing values using linear interpolation
gndvi_dropCol_df['gndvi'] = gndvi_dropCol_df['gndvi'].interpolate(method="linear")

display(gndvi_dropCol_df)

In [None]:
# Check for the sum of null values after missing values are filled
gndvi_dropCol_df.isnull().sum()

In [None]:
# Export dataframe into a csv file in Google Drive
gndvi_dropCol_df.to_csv('GNDVI_Complete.csv', index=False)

In [None]:
# Get current directory
current_directory = os.getcwd()
print("Current Directory:", current_directory)

In [None]:
# Change directory to parent directory
%cd ..

In [None]:
# Change directory to no2 folder
%cd no2

In [None]:
# Add the data files in the folder into a list and print it out
no2_files_list = []
for file in os.listdir():
  if file.endswith('.csv'):
    no2_files_list.append(file)

print("List of Files in NO2 Folder:")
print(*no2_files_list, sep = "\n")

In [None]:
# Sort the filenames based on the year and quarter
no2_files_sorted = sorted(no2_files_list, key=lambda x: (int(x.split('_')[1]), x.split('_')[2]))

# Concatenate the dataframes
no2_dfs = [pd.read_csv(filename) for filename in no2_files_sorted]
no2_combined_df = pd.concat(no2_dfs, ignore_index=True)

display(no2_combined_df)

In [None]:
# Remove irrelevant columns
no2_dropCol_df= no2_combined_df.drop(["system:index", ".geo"], axis=1)
display(no2_dropCol_df)

In [None]:
# Export dataframe into a csv file in Google Drive
no2_dropCol_df.to_csv('NO2_Complete.csv', index=False)

In [None]:
# Get current directory
current_directory = os.getcwd()
print("Current Directory:", current_directory)

In [None]:
# Change directory to parent directory
%cd ..

In [None]:
# Change directory to soil moisture folder
%cd soil_moisture

In [None]:
# Add the data files in the folder into a list and print it out
soil_moisture_files_list = []
for file in os.listdir():
  if file.endswith('.csv'):
    soil_moisture_files_list.append(file)

print("List of Files in Soil Moisture Folder:")
print(*soil_moisture_files_list, sep = "\n")

In [None]:
# Sort the filenames based on the year and quarter
soil_moisture_files_sorted = sorted(soil_moisture_files_list, key=lambda x: (int(x.split('_')[1]), x.split('_')[2]))

# Concatenate the dataframes
soil_moisture_dfs = [pd.read_csv(filename) for filename in soil_moisture_files_sorted]
soil_moisture_combined_df = pd.concat(soil_moisture_dfs, ignore_index=True)

display(soil_moisture_combined_df)

In [None]:
# Remove irrelevant columns
soil_moisture_dropCol_df= soil_moisture_combined_df.drop(["system:index", ".geo"], axis=1)
display(soil_moisture_dropCol_df)

In [None]:
# Export dataframe into a csv file in Google Drive
soil_moisture_dropCol_df.to_csv("Soil_Moisture_Complete.csv", index=False)

In [None]:
# Get current directory
current_directory = os.getcwd()
print("Current Directory:", current_directory)

In [None]:
# Change directory to parent directory
%cd ..

In [None]:
# Change directory to soil temperature folder
%cd soil_temp

In [None]:
# Add the data files in the folder into a list and print it out
soil_temp_files_list = []
for file in os.listdir():
  if file.endswith('.csv'):
    soil_temp_files_list.append(file)

print("List of Files in Soil Temperature Folder:")
print(*soil_temp_files_list, sep = "\n")

In [None]:
# Sort the filenames based on the year and quarter
soil_temp_files_sorted = sorted(soil_temp_files_list, key=lambda x: (int(x.split('_')[1]), x.split('_')[2]))

# Concatenate the dataframes
soil_temp_dfs = [pd.read_csv(filename) for filename in soil_temp_files_sorted]
soil_temp_combined_df = pd.concat(soil_temp_dfs, ignore_index=True)

display(soil_temp_combined_df)

In [None]:
# Remove irrelevant columns
soil_temp_dropCol_df= soil_temp_combined_df.drop(["system:index", ".geo"], axis=1)
display(soil_temp_dropCol_df)

In [None]:
# Export dataframe into a csv file in Google Drive
soil_temp_dropCol_df.to_csv("Soil_Temp_Complete.csv", index=False)