## Merge Data from multiple csv files to one file

Since the portal gives you multiple files for the location, the site information and the water level for each download instance which cannot be greater than 100, we need to merge the files into one for the analysis. Before that we need to make all the files use the same delimiter otherwise they dont merge.

Make sure you only have files you need to merge in the folder and remove unnecessary files once the code runs.

In [8]:
import os
import pandas as pd


def identify_delimiter(file_path: str) -> str:

#     Identify the delimiter used in the first line of a CSV file.
#     Args:
#         file_path (str): The path of the CSV file.
#     Returns:
#         str: The detected delimiter (',' for comma, '\t' for tab, or ';' for default).

    with open(file_path, 'r') as f:
        first_line = f.readline()
        if ',' in first_line:
            return ','
        elif '\t' in first_line:
            return '\t'
        else:
            # If no common delimiter is found, assume semicolon as the default
            return ';'


def change_delimiter(input_file: str, output_file: str, delimiter: str) -> None:

#     Read a CSV file with a given delimiter and save it with a different delimiter.
#     Args:
#         input_file (str): The path of the input CSV file.
#         output_file (str): The path of the output CSV file.
#         delimiter (str): The delimiter used in the input file.

    df = pd.read_csv(input_file, delimiter=delimiter)
    df.to_csv(output_file, index=False, sep=';')


def process_csv_files() -> None:

    # Process all CSV files in the current working directory.
    # For each file, identify the delimiter, and save the modified CSV file.

    root_folder = os.getcwd()  # Get the current working directory as the root folder

    for file in os.listdir(root_folder):
        if file.endswith('.csv'):
            input_file_path = os.path.join(root_folder, file)
            output_file_path = os.path.join(root_folder, f"m_{file}")

            delimiter = identify_delimiter(input_file_path)
            change_delimiter(input_file_path, output_file_path, delimiter)


if __name__ == "__main__":
    process_csv_files()


In [None]:


import os
import pandas as pd

def identify_delimiter(file_path: str) -> str:
    with open(file_path, 'r') as f:
        first_line = f.readline()
        if ',' in first_line:
            return ','
        elif '\t' in first_line:
            return '\t'
        else:
            # If no common delimiter is found, assume semicolon as the default
            return ';'

def process_csv_files():
    # This function reads all CSV files in the current working directory and appends their contents to a master DataFrame.
    # The resulting DataFrame is then saved to a new CSV file named 'Master High Plains.csv'.

    files_to_merge = []

    for file in os.listdir(os.getcwd()):
        if file.endswith('.csv'):
            input_file_path = os.path.join(os.getcwd(), file)
            delimiter = identify_delimiter(input_file_path)
            files_to_merge.append(pd.read_csv(input_file_path, delimiter=delimiter))

    master_df = pd.concat(files_to_merge, ignore_index=True)

    master_df.to_csv('Master High Plains.csv', index=False)

process_csv_files()


## Format Columns

You might have data which is unwanted to you need to drop a few columns for the analysis and create a new file.

# Merge the Location and Water Level File 

In [None]:
import pandas as pd

# Load the first CSV file
#data1 = pd.read_csv('Water Lvl Master - copy.csv')
data1 = pd.read_csv('Water Lvl Master - copy.csv', low_memory=False)

# Load the second CSV file
data2 = pd.read_csv('Site Master.csv', low_memory=False)

# Perform the merge based on the common unique ID column
merged_data = pd.merge(data1, data2[['SiteNo', 'DecLatVa', 'DecLongVa']], on='SiteNo', how='left')

# Save the merged data to a new CSV file
merged_data.to_csv('merged_output.csv', index=False)