In [None]:
import pandas as pd
from google.oauth2.credentials import Credentials
from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import gspread
import os

def connect_to_sheets(credentials_file):
    """
    Establish connection to Google Sheets and return the spreadsheet object
    """
    credentials = service_account.Credentials.from_service_account_file(
        credentials_file,
        scopes=['https://www.googleapis.com/auth/spreadsheets']
    )
    
    gc = gspread.authorize(credentials)
    spreadsheet_url = os.getenv('SPREADSHEET_URL')
    return gc.open_by_url(spreadsheet_url)

def read_worksheet_to_df(spreadsheet, worksheet_name):
    """
    Read a worksheet and convert it to a pandas DataFrame
    """
    worksheet = spreadsheet.worksheet(worksheet_name)
    all_values = worksheet.get_all_values()
    headers = all_values[0]
    data = all_values[1:]
    return pd.DataFrame(data, columns=headers)

def separate_opening_stock(df):
    """
    Separate opening stock records from the main DataFrame
    """
    opening_stock_mask = df['purchasing_officer'].str.contains('opening stock', case=False, na=False)
    opening_stock_df = df[opening_stock_mask].copy()
    main_df = df[~opening_stock_mask].copy()
    return main_df, opening_stock_df

def standardize_dataframe(df):
    """
    Standardize DataFrame formatting
    """
    df_clean = df.copy()
    
    # Standardize column names
    df_clean.columns = df_clean.columns.str.lower()  
    df_clean.columns = df_clean.columns.str.strip()
    df_clean.columns = df_clean.columns.str.replace(' ', '_')  
    df_clean.columns = df_clean.columns.str.replace('-', '_')
    
    # Convert all columns to string type first
    for column in df_clean.columns:
        df_clean[column] = df_clean[column].astype('string')
        df_clean[column] = df_clean[column].str.strip().str.lower()
        
        # Try converting to numeric
        try:
            numeric_values = pd.to_numeric(df_clean[column].str.replace(',', ''), errors='raise')
            df_clean[column] = numeric_values
        except (ValueError, TypeError):
            pass
    
    return df_clean

def standardize_dates(df):
    """
    Standardize dates to consistent format
    """
    if df.empty:
        return df
        
    df = df.copy()
    
    try:
        df['date'] = pd.to_datetime(df['date'], format='%d %b %Y')
    except ValueError:
        try:
            df['date'] = pd.to_datetime(df['date'], format='%d/%m/%y')
        except ValueError:
            df['date'] = pd.to_datetime(df['date'], format='mixed', dayfirst=True)
    
    df['month'] = df['date'].dt.strftime('%b').str.lower()
    df['year_month'] = df['date'].dt.strftime('%Y-%b')
    df['date'] = df['date'].dt.strftime('%Y-%m-%d')
    
    return df

def process_sheets_data(stock_inflow_df, release_df):
    """
    Process both DataFrames
    """
    stock_inflow_df = standardize_dataframe(stock_inflow_df)
    release_df = standardize_dataframe(release_df)
    
    stock_inflow_main_df, opening_stock_df = separate_opening_stock(stock_inflow_df)
    
    release_df = release_df[~release_df['name_of_collector'].str.contains('opening stock', case=False, na=False)]
    
    stock_inflow_main_df = standardize_dates(stock_inflow_main_df)
    opening_stock_df = standardize_dates(opening_stock_df)
    release_df = standardize_dates(release_df)
    
    return stock_inflow_main_df, opening_stock_df, release_df

def upload_df_to_gsheet(df, spreadsheet_id, sheet_name, credentials_file):
    """
    Upload DataFrame to Google Sheets
    """
    try:
        df_copy = df.copy()
        
        datetime_columns = df_copy.select_dtypes(include=['datetime64[ns]']).columns
        for col in datetime_columns:
            df_copy[col] = df_copy[col].dt.strftime('%Y-%m-%d')
            
        for col in df_copy.columns:
            if df_copy[col].dtype == 'object':
                df_copy[col] = df_copy[col].astype(str)
        
        SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
        credentials = service_account.Credentials.from_service_account_file(
            credentials_file,
            scopes=SCOPES
        )
        
        service = build('sheets', 'v4', credentials=credentials)
        
        values = [df_copy.columns.values.tolist()]
        values.extend(df_copy.values.tolist())
        
        body = {
            'values': values
        }
        
        clear_request = service.spreadsheets().values().clear(
            spreadsheetId=spreadsheet_id,
            range=f'{sheet_name}!A1:ZZ'
        )
        clear_request.execute()
        
        result = service.spreadsheets().values().update(
            spreadsheetId=spreadsheet_id,
            range=f'{sheet_name}!A1',
            valueInputOption='RAW',
            body=body
        ).execute()
        
        print(f"Updated {result.get('updatedCells')} cells in {sheet_name}")
        return True
        
    except HttpError as error:
        print(f"An error occurred in {sheet_name}: {error}")
        return False
    except Exception as e:
        print(f"An unexpected error occurred in {sheet_name}: {str(e)}")
        return False

def main():
    CREDENTIALS_FILE = 'credentials.json'
    
    try:
        spreadsheet = connect_to_sheets(CREDENTIALS_FILE)
        
        stock_inflow_df = read_worksheet_to_df(spreadsheet, 'stock_inflow')
        release_df = read_worksheet_to_df(spreadsheet, 'release')
        
        stock_inflow_main_df, opening_stock_df, release_df = process_sheets_data(stock_inflow_df, release_df)
        
        spreadsheet_id = os.getenv('SPREADSHEET_ID')
        
        success_stock = upload_df_to_gsheet(
            stock_inflow_main_df, 
            spreadsheet_id, 
            'stock_inflow_clean',
            CREDENTIALS_FILE
        )
        
        success_opening_stock = upload_df_to_gsheet(
            opening_stock_df, 
            spreadsheet_id, 
            'opening_stock',
            CREDENTIALS_FILE
        )
        
        success_release = upload_df_to_gsheet(
            release_df, 
            spreadsheet_id, 
            'release_clean',
            CREDENTIALS_FILE
        )
        
        if success_stock and success_opening_stock and success_release:
            print("Data processing and upload completed successfully!")
        else:
            raise Exception("Failed to upload one or more datasets")
            
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        raise

if __name__ == "__main__":
    main()