In [3]:
import pandas as pd
import json

# Function to safely parse the string representation of a list of dictionaries
def parse_management_chain(chain_str):
    try:
        # Convert the string representation to a list of dictionaries
        chain = json.loads(chain_str.replace("'", "\""))
        return [item['S'] for item in chain]  # Extracting the 'S' value from each dictionary in the list
    except:
        return []  # In case of parsing error, return an empty list

def main():
    # Load the CSV file
    file_path = 'C:/Users/jiang/Desktop/events.csv'  # Replace with your file path
    df = pd.read_csv(file_path)

    # Apply the parsing function to each row in the 'management_chain' column
    df['management_chain_parsed'] = df['management_chain'].apply(parse_management_chain)

    # Create a dictionary with 'user_id' as keys and 'management_chain_parsed' as values
    user_management_dict = pd.Series(df.management_chain_parsed.values, index=df.user_id).to_dict()
    user_manager_dict = pd.Series(df.manager_id.values, index=df.user_id).to_dict()
    event_types = df['event_type'].unique()

    return user_management_dict, user_manager_dict, event_types

# Call the main function and get the dictionary
user_management_chain, user_manager_dict, event_types = main()
print(user_management_chain)
print(user_manager_dict)
print(event_types)


{'glenak': ['sahamm', 'dclowe', 'mayoun', 'mihirpat', 'cmoses', 'steve', 'ajassy']}
{'glenak': 'sahamm'}
['Barrister' 'Shoehorn' 'Isengard']


In [8]:
# Function to parse the "form_input" field and extract "reason", "tooling", and "useCase"
def parse_form_input(input_str):
    try:
        # Convert the string representation to a dictionary
        input_dict = json.loads(input_str)
        # Extract the required fields
        reason = input_dict.get('reason', {}).get('S', '')
        tooling = input_dict.get('tooling', {}).get('S', '')
        use_case = input_dict.get('useCase', {}).get('S', '')
        return reason, tooling, use_case
    except json.JSONDecodeError:
        return '', '', ''  # Return empty strings in case of parsing error

def main():
    # Load the CSV file
    file_path = 'C:/Users/jiang/Desktop/logs.csv'  # Replace with your file path
    df = pd.read_csv(file_path)

    # Apply the parsing function to each row in the 'form_input' column
    parsed_data = df['form_input'].apply(parse_form_input)

    # Convert the resulting series of tuples into a DataFrame
    parsed_df = pd.DataFrame(parsed_data.tolist(), columns=['Reason', 'Tooling', 'UseCase'])
    user_df = df['user_id']
    
    result = pd.concat([user_df, parsed_df], axis=1)

    return result

# Call the main function and display the first few rows of the parsed data
result = main()
print(result.head())

    user_id                        Reason Tooling UseCase
0  acarladd                         Other                
1  acarladd  Tooling down or did not work  Timber        
2  acarladd                         Other                
3   hdeluna                         Other                
