In [2]:
pip install xlsxwriter

Collecting xlsxwriter
  Downloading XlsxWriter-3.1.2-py3-none-any.whl (153 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m153.0/153.0 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xlsxwriter
Successfully installed xlsxwriter-3.1.2


In [1]:
from google.colab import files
uploaded = files.upload()

Saving CO_U and CO_C Wells with Ra data DNR merged -S&G in MCOAS.xlsx to CO_U and CO_C Wells with Ra data DNR merged -S&G in MCOAS.xlsx


In [20]:
import pandas as pd
import numpy as np
import os
from datetime import datetime
import openpyxl

def read_excel_file(file_path, sheet_name):
    df = pd.read_excel(file_path, sheet_name=sheet_name)
    df['Sample Date'] = pd.to_datetime(df['Sample Date']).dt.tz_localize(None)
    df['Sample Date'] = df['Sample Date'].dt.year
    return df

def wells_sampled(df):
    df = df.groupby(['WI_UNIQUE_', 'Sample Date']).agg({'Measured A': ['count', 'max']}).reset_index()
    df.columns = ['Well_ID', 'Sample Date', 'Count', 'Max_Concentration']

    wells_sampled_once = df[df['Count'] == 1][['Well_ID', 'Sample Date', 'Max_Concentration']]
    wells_sampled_multiple = df[df['Count'] > 1][['Well_ID', 'Sample Date', 'Max_Concentration']]

    return wells_sampled_once, wells_sampled_multiple

def main():
    file_path = "CO_U and CO_C Wells with Ra data DNR merged -S&G in MCOAS.xlsx"
    sheet_name = "CO_U"
    output_sheet_name = "Updated_Wells_final"

    df = read_excel_file(file_path, sheet_name)
    print(df)

    wells_sampled_once, wells_sampled_multiple = wells_sampled(df)

    combined_wells = pd.concat([wells_sampled_once, wells_sampled_multiple])
    print(len(combined_wells))
    print(combined_wells)

    # Merge the updated data with the original data
    merged_data = pd.merge(df, combined_wells, how='inner', left_on=['WI_UNIQUE_', 'Sample Date'], right_on=['Well_ID', 'Sample Date'])

    # Convert timezone-aware datetime to timezone-naive datetime
    merged_data['Sample Date'] = merged_data['Sample Date'].apply(lambda dt: dt.replace(tzinfo=None) if pd.notnull(dt) else '')

    # Open the workbook with openpyxl
    book = openpyxl.load_workbook(file_path)

    # Check if the sheet exists and delete it if it does
    if output_sheet_name in book.sheetnames:
        del book[output_sheet_name]

    # Create the pandas Excel writer with openpyxl as the engine
    writer = pd.ExcelWriter(file_path, engine='openpyxl')
    writer.book = book

    # Write the DataFrame to the specific sheet in the Excel file
    merged_data.to_excel(writer, sheet_name=output_sheet_name, index=False)

    # Save the changes
    writer.save()

if __name__ == "__main__":
    main()


ValueError: ignored