# Find Delimitters and make them a common delimtter 

In [None]:
import os
import pandas as pd


def identify_delimiter(file_path: str) -> str:

#     Identify the delimiter used in the first line of a CSV file.
#     Args:
#         file_path (str): The path of the CSV file.
#     Returns:
#         str: The detected delimiter (',' for comma, '\t' for tab, or ';' for default).

    with open(file_path, 'r') as f:
        first_line = f.readline()
        if ',' in first_line:
            return ','
        elif '\t' in first_line:
            return '\t'
        else:
            # If no common delimiter is found, assume semicolon as the default
            return ';'


def change_delimiter(input_file: str, output_file: str, delimiter: str) -> None:

#     Read a CSV file with a given delimiter and save it with a different delimiter.
#     Args:
#         input_file (str): The path of the input CSV file.
#         output_file (str): The path of the output CSV file.
#         delimiter (str): The delimiter used in the input file.

    df = pd.read_csv(input_file, delimiter=delimiter)
    df.to_csv(output_file, index=False, sep=';')


def process_csv_files() -> None:

    # Process all CSV files in the current working directory.
    # For each file, identify the delimiter, and save the modified CSV file.

    root_folder = os.getcwd()  # Get the current working directory as the root folder

    for file in os.listdir(root_folder):
        if file.endswith('.csv'):
            input_file_path = os.path.join(root_folder, file)
            output_file_path = os.path.join(root_folder, f"m_{file}")

            delimiter = identify_delimiter(input_file_path)
            change_delimiter(input_file_path, output_file_path, delimiter)


if __name__ == "__main__":
    process_csv_files()


# Merge Files

In [None]:
import os
import pandas as pd

def identify_delimiter(file_path: str) -> str:
    with open(file_path, 'r') as f:
        first_line = f.readline()
        if ',' in first_line:
            return ','
        elif '\t' in first_line:
            return '\t'
        else:
            # If no common delimiter is found, assume semicolon as the default
            return ';'

def process_csv_files():
    # This function reads all CSV files in the current working directory and appends their contents to a master DataFrame.
    # The resulting DataFrame is then saved to a new CSV file named 'Master High Plains.csv'.

    files_to_merge = []

    for file in os.listdir(os.getcwd()):
        if file.endswith('.csv'):
            input_file_path = os.path.join(os.getcwd(), file)
            delimiter = identify_delimiter(input_file_path)
            files_to_merge.append(pd.read_csv(input_file_path, delimiter=delimiter))

    master_df = pd.concat(files_to_merge, ignore_index=True)

    master_df.to_csv('Master High Plains.csv', index=False)

process_csv_files()


# Merge the Location and Water Level File 

Change the delimiter from ; to ,

In [11]:
import csv

def change_delimiter(input_file, output_file, input_delimiter=';', output_delimiter=','):
    with open(input_file, 'r') as file_in, open(output_file, 'w', newline='') as file_out:
        reader = csv.reader(file_in, delimiter=input_delimiter)
        writer = csv.writer(file_out, delimiter=output_delimiter)
        for row in reader:
            writer.writerow(row)

# Example usage
input_file = 'Water Level Master.csv'
output_file = 'Water Level Master_D.csv'
change_delimiter(input_file, output_file, ';', ',')

print("Delimiter changed successfully!")

Delimiter changed successfully!


Rename the first column so they are the same 

In [None]:
import pandas as pd

# Replace 'your_file.csv' with the actual filename
file_path = 'Water Level Master_D.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

# Rename the first column (assuming the first column's name is 'old_name')
new_name = 'Site'
df.rename(columns={df.columns[0]: new_name}, inplace=True)

# Save the modified DataFrame back to a CSV file
df.to_csv(file_path, index=False)


In [19]:
import pandas as pd

# Replace 'file1.csv' and 'file2.csv' with the actual file paths of your CSV files
file1_path = 'LM_D.csv'
file2_path = 'Water Level Master_D.csv'

# Read the CSV files into pandas DataFrames
# Specify dtype for the 'Site' column as string to avoid DtypeWarning
df1 = pd.read_csv(file1_path, dtype={'Site': object})
df2 = pd.read_csv(file2_path, dtype={'Site': object})  # Specify 'Site' as string type or object

# Merge the DataFrames based on the common column 'site'
merged_df = pd.merge(df1, df2, on='Site')

# Save the merged DataFrame to a new CSV file or work with it as needed
merged_df.to_csv('merged_file.csv', index=False)


In [27]:
import pandas as pd

# Assuming your CSV file is named 'datafile.csv' and the column containing sites is named 'site_column'
data = pd.read_csv('LM_D.csv', low_memory=False)

# Count the number of unique sites in the 'site_column'
unique_sites = len(data['DecLatVa'].unique())

print("Number of unique sites:", unique_sites)


Number of unique sites: 3609
