# Function 

## Date function 

### Object to Date Function

In [None]:
import pandas as pd
import numpy as np

def convert_to_datetime_or_date(df, columns, format_type='datetime'):
    """
    Convert specified columns in a DataFrame to datetime or date format.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the columns to be converted.
    columns (list): List of column names to be converted.
    format_type (str): The target format ('datetime' or 'date'). Default is 'datetime'.

    Returns:
    pd.DataFrame: The DataFrame with specified columns converted.
    """
    for column in columns:
        if column in df.columns:
            if format_type == 'datetime':
                df[column] = pd.to_datetime(df[column])
                # Remove timezone if present to ensure consistency
                df[column] = df[column].dt.tz_localize(None)
            elif format_type == 'date':
                df[column] = pd.to_datetime(df[column]).dt.normalize()
                # Remove timezone if present to ensure consistency
                df[column] = df[column].dt.tz_localize(None)
            else:
                raise ValueError("Invalid format_type. Use 'datetime' or 'date'.")
        else:
            print(f"Warning: '{column}' not found in DataFrame columns.")
    return df

# # example of calling the function
# df = convert_to_datetime_or_date(df, ['SalesforceDateTime', 'AnotherDateTime'], format_type='date')



## Access data from salesforce via SOQL

In [None]:
import requests
import pandas as pd
from simple_salesforce import Salesforce
import keyring

def export_salesforce_data(query, export_file=None):
    # Retrieve your credentials
    username = keyring.get_password("salesforce", "username")
    password = keyring.get_password("salesforce", "password")
    security_token = keyring.get_password("salesforce", "security_token")
    consumer_key = keyring.get_password("salesforce", "consumer_key")
    consumer_secret = keyring.get_password("salesforce", "consumer_secret")

    # Step 1: Obtain OAuth 2.0 token
    token_url = "https://login.salesforce.com/services/oauth2/token"
    payload = {
        'grant_type': 'password',
        'client_id': consumer_key,
        'client_secret': consumer_secret,
        'username': username,
        'password': password + security_token
    }
    response = requests.post(token_url, data=payload)
    response.raise_for_status()  # Check if the request was successful

    # Extract access token from the response
    access_token = response.json().get('access_token')
    instance_url = response.json().get('instance_url')

    # Step 2: Authenticate to Salesforce using the access token
    sf = Salesforce(instance_url=instance_url, session_id=access_token)

    # Step 3: Query data
    records = []
    query_result = sf.query_all(query)
    records.extend(query_result['records'])

    # Continue querying if there are more records
    while not query_result['done']:
        query_result = sf.query_more(query_result['nextRecordsUrl'], True)
        records.extend(query_result['records'])

    # Function to flatten nested dictionaries
    def flatten_record(record, parent_key='', sep='.'):
        items = []
        for k, v in record.items():
            new_key = f"{parent_key}{sep}{k}" if parent_key else k
            if isinstance(v, dict):
                items.extend(flatten_record(v, new_key, sep=sep).items())
            else:
                items.append((new_key, v))
        return dict(items)

    # Flatten all records
    flattened_records = [flatten_record(record) for record in records]

    # Convert the data to a pandas DataFrame
    df = pd.DataFrame(flattened_records)

    # Clean up the DataFrame (remove Salesforce metadata)
    if 'attributes.type' in df.columns:
        df = df.drop(columns=['attributes.type', 'attributes.url'])

    # Optionally, export the DataFrame to a CSV file if export_file is provided
    if export_file:
        df.to_csv(export_file, index=False)
        print(f"Data exported to {export_file}")

    # Return the DataFrame
    return df

# example of calling the function


# # Example usage
# query = """
# SELECT
#     id,
#     User__c, 
#     Name, 
#     Primary__r.Name,
#     Staff_Activated__c,
#     Primary__r.POD__r.Name,
#     Role_Title__r.Name  
# FROM
#     Staff__c
# """
# export_file = 'test.csv'

# # Get the DataFrame
# df = export_salesforce_data(query)

# # Display the DataFrame
# df


## Check duplicate

In [None]:
import pandas as pd

def check_duplicates(df, columns):
    """
    This function checks for duplicate records based on one or more columns in a Pandas DataFrame.
    
    Parameters:
    df (pd.DataFrame): The DataFrame to check for duplicates.
    columns (str or list): The column name (or a list of column names) to check for duplicates.
    
    Returns:
    bool: Returns True if duplicates are found, otherwise False.
    Displays the duplicated rows if any are found.
    """
    # Check if a single column or a list of columns is passed
    if isinstance(columns, str):
        columns = [columns]
    
    # Check for duplicates based on the specified column(s)
    duplicates = df[df.duplicated(subset=columns, keep=False)]
    
    if not duplicates.empty:
        print("Duplicates found:")
        print(duplicates)
        return True
    else:
        print("No duplicates found.")
        return False

# Example usage:
# Creating a sample DataFrame
data = {
    'A': [1, 2, 2, 4],
    'B': [5, 6, 6, 8],
    'C': ['X', 'Y', 'Y', 'Z']
}

df = pd.DataFrame(data)

# Check for duplicates in a single column
check_duplicates(df, 'A')

# Check for duplicates in multiple columns
check_duplicates(df, ['A', 'B'])


## Exporting the data to MYSQL80

In [None]:
import pandas as pd
from sqlalchemy import create_engine
import urllib.parse
import win32cred
from sqlalchemy.orm import sessionmaker

def get_windows_credentials(target_name):
    """Retrieve credentials from Windows Credential Manager."""
    creds = win32cred.CredRead(target_name, win32cred.CRED_TYPE_GENERIC, 0)
    username = creds['UserName']
    password = creds['CredentialBlob'].decode('utf-16')
    return username, password

def load_dataframe_to_mysql(df, table_name):
    """Load a DataFrame into a MySQL table with the same structure as the DataFrame."""
    # Retrieve credentials
    target_name = 'SQLServerConnection'  # The name you used when storing the credentials
    username, password = get_windows_credentials(target_name)
    
    # MySQL connection details
    host = 'localhost'  # 'localhost' for local server
    database = 'edwardmellorsalesforce'  # Replace with your MySQL database name
    
    # URL-encode the password
    encoded_password = urllib.parse.quote_plus(password)
    
    # Create a connection string
    connection_string = f'mysql+mysqlconnector://{username}:{encoded_password}@{host}/{database}'
    
    # Create SQLAlchemy engine with increased timeout and autocommit
    engine = create_engine(connection_string, connect_args={"connect_timeout": 600, "autocommit": True})
    
    # Create a session
    Session = sessionmaker(bind=engine)
    session = Session()
    
    try:
        # Convert table name to lowercase to avoid case sensitivity issues
        table_name = table_name.lower()
        
        # Debug: Print status before loading the DataFrame
        print(f"Loading DataFrame into MySQL table '{table_name}'...")
        
        # Load DataFrame into MySQL with chunking
        df.to_sql(name=table_name, con=engine, if_exists='replace', index=False, chunksize=1000)
        
        # Commit the session
        session.commit()

        # Debug: Confirm table creation
        print(f"DataFrame successfully exported to MySQL database into table '{table_name}'")
        
    except Exception as e:
        # Rollback in case of error
        session.rollback()
        print(f"Error: {e}")
    finally:
        # Ensure the connection is properly closed
        session.close()
        engine.dispose()
        print("Database connection closed.")

# Example of calling it

# if __name__ == "__main__":
#     # Load DataFrame into MySQL
#     load_dataframe_to_mysql(Master, 'Master')
