In [10]:
import pandas as pd
import json
from collections import Counter

# Read the CSV file
df = pd.read_csv('fcsample - Action_Dataset.csv')

# 1. Top 5 1st level actions
first_level_actions = df[(df['level'] == 1) & (df['action_level'] == 1)]['action'].value_counts().nlargest(5).to_dict()



In [11]:
# Print 1st level actions
print("Top 5 First-Level Actions:\n", first_level_actions)

Top 5 First-Level Actions:
 {'a12': 7, 'a13': 5, 'a14': 4, 'a11': 2, 'a15': 1}


In [13]:
# 2. Top 2 2nd level actions for each 1st level action
second_level_actions = {}
for index, row in df.iterrows():
    if row['level'] == 1 and row['action_level'] == 1:
        first_action = row['action']
        if index + 1 < len(df):
            next_row = df.iloc[index + 1]
            if next_row['level'] == 0 and next_row['action_level'] == 2:
                if first_action not in second_level_actions:
                    second_level_actions[first_action] = Counter()
                second_level_actions[first_action][next_row['action']] += 1

for action in second_level_actions:
    second_level_actions[action] = dict(second_level_actions[action].most_common(5))



In [14]:
# Print 1st level actions
print("Top 3 Second-Level Actions:\n", second_level_actions)

Top 3 Second-Level Actions:
 {'a14': {'a11': 2, 'a13': 2}, 'a11': {'a13': 1, 'a14': 1}, 'a13': {'a14': 2, 'a12': 1, 'a15': 1, 'a11': 1}, 'a12': {'a15': 3, 'a13': 2, 'a14': 1, 'a11': 1}, 'a15': {'a11': 1}}


In [6]:
# 3. Top 2 3rd level actions for each 1st/2nd level action combination
third_level_actions = {}
for index, row in df.iterrows():
    if row['level'] == 1 and row['action_level'] == 1:
        first_action = row['action']
        if index + 1 < len(df) and index + 2 < len(df):
            second_row = df.iloc[index + 1]
            third_row = df.iloc[index + 2]
            if (second_row['level'] == 0 and second_row['action_level'] == 2 and
                third_row['level'] == 0 and third_row['action_level'] == 3):
                key = (first_action, second_row['action'])
                if key not in third_level_actions:
                    third_level_actions[key] = Counter()
                third_level_actions[key][third_row['action']] += 1

for key in third_level_actions:
    third_level_actions[key] = dict(third_level_actions[key].most_common(5))



In [7]:
# Print 1st level actions
print("Top 3 Third-Level Actions:\n", third_level_actions)

Top 3 Third-Level Actions:
 {('a14', 'a11'): {'a12': 2}, ('a14', 'a13'): {'a15': 2}, ('a11', 'a13'): {'a15': 1}, ('a13', 'a12'): {'a11': 1}, ('a12', 'a13'): {'a11': 2}, ('a12', 'a15'): {'a14': 2, 'a11': 1}, ('a11', 'a14'): {'a13': 1}, ('a13', 'a14'): {'a11': 2}, ('a12', 'a14'): {'a15': 1}, ('a13', 'a15'): {'a14': 1}, ('a13', 'a11'): {'a12': 1}, ('a15', 'a11'): {'a14': 1}, ('a12', 'a11'): {'a14': 1}}


In [8]:
# Prepare the JSON output
result = {
    "top_5_first_level_actions": first_level_actions,
    "top_2_second_level_actions": second_level_actions,
    "top_2_third_level_actions": {f"{k[0]}-{k[1]}": v for k, v in third_level_actions.items()}
}

# Convert to JSON
json_output = json.dumps(result, indent=2)

# Print or save the JSON output
print(json_output)

# Optionally, save to a file
# with open('action_analysis.json', 'w') as f:
#     json.dump(result, f, indent=2)

{
  "top_5_first_level_actions": {
    "a12": 7,
    "a13": 5,
    "a14": 4,
    "a11": 2,
    "a15": 1
  },
  "top_2_second_level_actions": {
    "a14": {
      "a11": 2,
      "a13": 2
    },
    "a11": {
      "a13": 1,
      "a14": 1
    },
    "a13": {
      "a14": 2,
      "a12": 1
    },
    "a12": {
      "a15": 3,
      "a13": 2
    },
    "a15": {
      "a11": 1
    }
  },
  "top_2_third_level_actions": {
    "a14-a11": {
      "a12": 2
    },
    "a14-a13": {
      "a15": 2
    },
    "a11-a13": {
      "a15": 1
    },
    "a13-a12": {
      "a11": 1
    },
    "a12-a13": {
      "a11": 2
    },
    "a12-a15": {
      "a14": 2,
      "a11": 1
    },
    "a11-a14": {
      "a13": 1
    },
    "a13-a14": {
      "a11": 2
    },
    "a12-a14": {
      "a15": 1
    },
    "a13-a15": {
      "a14": 1
    },
    "a13-a11": {
      "a12": 1
    },
    "a15-a11": {
      "a14": 1
    },
    "a12-a11": {
      "a14": 1
    }
  }
}


In [9]:
import pandas as pd
import json
from collections import Counter

# Read the CSV file
df = pd.read_csv('fcsample - Action_Dataset.csv')

# 1. Top 5 1st level actions
first_level_actions = df[(df['level'] == 1) & (df['action_level'] == 1)]['action'].value_counts().nlargest(5).to_dict()

# Print 1st level actions
print("Top 5 1st-Level Actions:\n", first_level_actions)

# 2. Top 5 2nd level actions
second_level_actions = Counter()
for i in range(1, len(df)):
    current_row = df.iloc[i]
    previous_row = df.iloc[i-1]
    if (current_row['level'] == 0 and current_row['action_level'] == 2 and
        previous_row['level'] == 1 and previous_row['action_level'] == 1):
        second_level_actions[current_row['action']] += 1

second_level_actions = dict(second_level_actions.most_common(5))

# Print 2nd level actions
print("Top 5 2nd-Level Actions:\n", second_level_actions)

# 3. Top 5 3rd level actions
third_level_actions = Counter()
for i in range(1, len(df)):
    current_row = df.iloc[i]
    previous_row = df.iloc[i-1]
    if (current_row['level'] == 0 and current_row['action_level'] == 3 and
        previous_row['level'] == 0 and previous_row['action_level'] == 2):
        third_level_actions[current_row['action']] += 1

third_level_actions = dict(third_level_actions.most_common(5))

# Print 3rd level actions
print("Top 5 3rd-Level Actions:\n", third_level_actions)

# Prepare the JSON output
result = {
    "top_5_first_level_actions": first_level_actions,
    "top_5_second_level_actions": second_level_actions,
    "top_5_third_level_actions": third_level_actions
}

# Convert to JSON
json_output = json.dumps(result, indent=2)

# Print the JSON output
print(json_output)

# Optionally, save to a file
# with open('action_analysis.json', 'w') as f:
#     json.dump(result, f, indent=2)

Top 5 1st-Level Actions:
 {'a12': 7, 'a13': 5, 'a14': 4, 'a11': 2, 'a15': 1}
Top 5 2nd-Level Actions:
 {'a11': 5, 'a13': 5, 'a15': 4, 'a14': 4, 'a12': 1}
Top 5 3rd-Level Actions:
 {'a14': 6, 'a11': 6, 'a15': 4, 'a12': 3, 'a13': 1}
{
  "top_5_first_level_actions": {
    "a12": 7,
    "a13": 5,
    "a14": 4,
    "a11": 2,
    "a15": 1
  },
  "top_5_second_level_actions": {
    "a11": 5,
    "a13": 5,
    "a15": 4,
    "a14": 4,
    "a12": 1
  },
  "top_5_third_level_actions": {
    "a14": 6,
    "a11": 6,
    "a15": 4,
    "a12": 3,
    "a13": 1
  }
}


In [15]:
# This is the good working version from Claude
import pandas as pd
import json
from collections import Counter, defaultdict

# Read the CSV file
df = pd.read_csv('fcsample - Action_Dataset.csv')

# 1. Top 5 1st level actions
first_level_actions = df[(df['level'] == 1) & (df['action_level'] == 1)]['action'].value_counts().nlargest(5).to_dict()

# 2. Top 5 2nd level actions for each 1st level action
second_level_actions = defaultdict(Counter)
for i in range(len(df) - 1):
    current_row = df.iloc[i]
    next_row = df.iloc[i + 1]
    if (current_row['level'] == 1 and current_row['action_level'] == 1 and
        next_row['level'] == 0 and next_row['action_level'] == 2):
        second_level_actions[current_row['action']][next_row['action']] += 1

second_level_top5 = {action: dict(counter.most_common(5)) for action, counter in second_level_actions.items()}

# 3. Top 5 3rd level actions for each 1st/2nd level action combination
third_level_actions = defaultdict(Counter)
for i in range(len(df) - 2):
    first_row = df.iloc[i]
    second_row = df.iloc[i + 1]
    third_row = df.iloc[i + 2]
    if (first_row['level'] == 1 and first_row['action_level'] == 1 and
        second_row['level'] == 0 and second_row['action_level'] == 2 and
        third_row['level'] == 0 and third_row['action_level'] == 3):
        key = (first_row['action'], second_row['action'])
        third_level_actions[key][third_row['action']] += 1

third_level_top5 = {f"{k[0]}-{k[1]}": dict(v.most_common(5)) for k, v in third_level_actions.items()}

# Prepare the JSON output
result = {
    "top_5_first_level_actions": first_level_actions,
    "top_5_second_level_actions_per_first": second_level_top5,
    "top_5_third_level_actions_per_combo": third_level_top5
}

# Convert to JSON
json_output = json.dumps(result, indent=2)

# Print the JSON output
print(json_output)

# Optionally, save to a file
# with open('action_analysis.json', 'w') as f:
#     json.dump(result, f, indent=2)

{
  "top_5_first_level_actions": {
    "a12": 7,
    "a13": 5,
    "a14": 4,
    "a11": 2,
    "a15": 1
  },
  "top_5_second_level_actions_per_first": {
    "a14": {
      "a11": 2,
      "a13": 2
    },
    "a11": {
      "a13": 1,
      "a14": 1
    },
    "a13": {
      "a14": 2,
      "a12": 1,
      "a15": 1,
      "a11": 1
    },
    "a12": {
      "a15": 3,
      "a13": 2,
      "a14": 1,
      "a11": 1
    },
    "a15": {
      "a11": 1
    }
  },
  "top_5_third_level_actions_per_combo": {
    "a14-a11": {
      "a12": 2
    },
    "a14-a13": {
      "a15": 2
    },
    "a11-a13": {
      "a15": 1
    },
    "a13-a12": {
      "a11": 1
    },
    "a12-a13": {
      "a11": 2
    },
    "a12-a15": {
      "a14": 2,
      "a11": 1
    },
    "a11-a14": {
      "a13": 1
    },
    "a13-a14": {
      "a11": 2
    },
    "a12-a14": {
      "a15": 1
    },
    "a13-a15": {
      "a14": 1
    },
    "a13-a11": {
      "a12": 1
    },
    "a15-a11": {
      "a14": 1
    },
    "a12-a11"

In [18]:
import pandas as pd
import json
from collections import defaultdict

# Read the CSV file
df = pd.read_csv('fcsample - Action_Dataset.csv')

# 1. Top 5 1st level actions
first_level_actions = df[(df['level'] == 1) & (df['action_level'] == 1)]['action'].value_counts().nlargest(5).to_dict()

# Helper function to get top 5 actions
def get_top_5(group):
    return group.value_counts().nlargest(5).to_dict()

# 2. Top 5 2nd level actions for each 1st level action
df['prev_action'] = df['action'].shift()
df['prev_level'] = df['level'].shift()
df['prev_action_level'] = df['action_level'].shift()

second_level_mask = (
    (df['level'] == 0) &
    (df['action_level'] == 2) &
    (df['prev_level'] == 1) &
    (df['prev_action_level'] == 1)
)

second_level_actions = df[second_level_mask].groupby('prev_action')['action'].apply(get_top_5).to_dict()

# 3. Top 5 3rd level actions for each 1st/2nd level action combination
df['prev_prev_action'] = df['action'].shift(2)
df['prev_prev_level'] = df['level'].shift(2)
df['prev_prev_action_level'] = df['action_level'].shift(2)

third_level_mask = (
    (df['level'] == 0) &
    (df['action_level'] == 3) &
    (df['prev_level'] == 0) &
    (df['prev_action_level'] == 2) &
    (df['prev_prev_level'] == 1) &
    (df['prev_prev_action_level'] == 1)
)

third_level_actions = df[third_level_mask].groupby(['prev_prev_action', 'prev_action'])['action'].apply(get_top_5)
third_level_actions = {f"{k[0]}-{k[1]}": v for k, v in third_level_actions.to_dict().items()}

# Prepare the JSON output
result = {
    "top_5_first_level_actions": first_level_actions,
    "top_5_second_level_actions_per_first": second_level_actions,
    "top_5_third_level_actions_per_combo": third_level_actions
}

# Convert any non-string keys to strings
def stringify_keys(d):
    return {str(key): value if not isinstance(value, dict) else stringify_keys(value)
            for key, value in d.items()}

result = stringify_keys(result)

# Convert to JSON
json_output = json.dumps(result, indent=2)

# Print the JSON output
print(json_output)

# Optionally, save to a file
# with open('action_analysis.json', 'w') as f:
#     json.dump(result, f, indent=2)

{
  "top_5_first_level_actions": {
    "a12": 7,
    "a13": 5,
    "a14": 4,
    "a11": 2,
    "a15": 1
  },
  "top_5_second_level_actions_per_first": {
    "('a11', 'a13')": 1.0,
    "('a11', 'a14')": 1.0,
    "('a11', 'a15')": NaN,
    "('a11', 'a11')": NaN,
    "('a11', 'a12')": NaN,
    "('a12', 'a13')": 2.0,
    "('a12', 'a14')": 1.0,
    "('a12', 'a15')": 3.0,
    "('a12', 'a11')": 1.0,
    "('a12', 'a12')": NaN,
    "('a13', 'a13')": NaN,
    "('a13', 'a14')": 2.0,
    "('a13', 'a15')": 1.0,
    "('a13', 'a11')": 1.0,
    "('a13', 'a12')": 1.0,
    "('a14', 'a13')": 2.0,
    "('a14', 'a14')": NaN,
    "('a14', 'a15')": NaN,
    "('a14', 'a11')": 2.0,
    "('a14', 'a12')": NaN,
    "('a15', 'a13')": NaN,
    "('a15', 'a14')": NaN,
    "('a15', 'a15')": NaN,
    "('a15', 'a11')": 1.0,
    "('a15', 'a12')": NaN
  },
  "top_5_third_level_actions_per_combo": {
    "a11-a13": NaN,
    "a11-a14": NaN,
    "a12-a11": NaN,
    "a12-a13": NaN,
    "a12-a14": NaN,
    "a12-a15": NaN,
    "

In [20]:
# Deleted NAGH
import pandas as pd
import json
import numpy as np

# Read the CSV file
df = pd.read_csv('fcsample - Action_Dataset.csv')

# Helper function to get top 5 actions, excluding NaN values
def get_top_5(group):
    return group.value_counts().nlargest(5).to_dict()

# 1. Top 5 1st level actions
first_level_actions = df[(df['level'] == 1) & (df['action_level'] == 1)]['action'].value_counts().nlargest(5).to_dict()

# 2. Top 5 2nd level actions for each 1st level action
df['prev_action'] = df['action'].shift()
df['prev_level'] = df['level'].shift()
df['prev_action_level'] = df['action_level'].shift()

second_level_mask = (
    (df['level'] == 0) &
    (df['action_level'] == 2) &
    (df['prev_level'] == 1) &
    (df['prev_action_level'] == 1)
)

second_level_actions = df[second_level_mask].groupby('prev_action')['action'].apply(get_top_5).to_dict()

# 3. Top 5 3rd level actions for each 1st/2nd level action combination
df['prev_prev_action'] = df['action'].shift(2)
df['prev_prev_level'] = df['level'].shift(2)
df['prev_prev_action_level'] = df['action_level'].shift(2)

third_level_mask = (
    (df['level'] == 0) &
    (df['action_level'] == 3) &
    (df['prev_level'] == 0) &
    (df['prev_action_level'] == 2) &
    (df['prev_prev_level'] == 1) &
    (df['prev_prev_action_level'] == 1)
)

third_level_actions = df[third_level_mask].groupby(['prev_prev_action', 'prev_action'])['action'].apply(get_top_5)
third_level_actions = {f"{k[0]}-{k[1]}": v for k, v in third_level_actions.to_dict().items()}

# Prepare the JSON output
result = {
    "top_5_first_level_actions": first_level_actions,
    "top_5_second_level_actions_per_first": second_level_actions,
    "top_5_third_level_actions_per_combo": third_level_actions
}

# Function to remove NaN values, convert to int, and stringify keys
def clean_dict(d):
    if isinstance(d, dict):
        return {str(k): clean_dict(v) for k, v in d.items() if not (isinstance(v, float) and np.isnan(v))}
    elif isinstance(d, float) and not np.isnan(d):
        return int(d)
    else:
        return d

# Clean the result dictionary
result = clean_dict(result)

# Convert to JSON
json_output = json.dumps(result, indent=2)

# Print the JSON output
print(json_output)

# Optionally, save to a file
# with open('action_analysis.json', 'w') as f:
#     json.dump(result, f, indent=2)

{
  "top_5_first_level_actions": {
    "a12": 7,
    "a13": 5,
    "a14": 4,
    "a11": 2,
    "a15": 1
  },
  "top_5_second_level_actions_per_first": {
    "('a11', 'a13')": 1,
    "('a11', 'a14')": 1,
    "('a12', 'a13')": 2,
    "('a12', 'a14')": 1,
    "('a12', 'a15')": 3,
    "('a12', 'a11')": 1,
    "('a13', 'a14')": 2,
    "('a13', 'a15')": 1,
    "('a13', 'a11')": 1,
    "('a13', 'a12')": 1,
    "('a14', 'a13')": 2,
    "('a14', 'a11')": 2,
    "('a15', 'a11')": 1
  },
  "top_5_third_level_actions_per_combo": {
    "a13-a11": 1,
    "a14-a11": 2
  }
}


In [23]:
# Unsorted one
import pandas as pd
import json
import numpy as np

# Read the CSV file
df = pd.read_csv('fcsample - Action_Dataset.csv')

# Helper function to get top 5 actions, excluding NaN values
def get_top_5(group):
    counts = group.value_counts().nlargest(5)
    return {str(index): int(value) for index, value in counts.items()}

# 1. Top 5 1st level actions
first_level_actions = df[(df['level'] == 1) & (df['action_level'] == 1)]['action'].value_counts().nlargest(5).to_dict()

# 2. Top 5 2nd level actions for each 1st level action
df['prev_action'] = df['action'].shift()
df['prev_level'] = df['level'].shift()
df['prev_action_level'] = df['action_level'].shift()

second_level_mask = (
    (df['level'] == 0) &
    (df['action_level'] == 2) &
    (df['prev_level'] == 1) &
    (df['prev_action_level'] == 1)
)

second_level_df = df[second_level_mask].groupby(['prev_action', 'action']).size().reset_index(name='count')
second_level_actions = {}
for first_level in second_level_df['prev_action'].unique():
    top_5 = second_level_df[second_level_df['prev_action'] == first_level].nlargest(5, 'count')
    for _, row in top_5.iterrows():
        key = f"{first_level}-{row['action']}"
        second_level_actions[key] = int(row['count'])

# 3. Top 5 3rd level actions for each 1st/2nd level action combination
df['prev_prev_action'] = df['action'].shift(2)
df['prev_prev_level'] = df['level'].shift(2)
df['prev_prev_action_level'] = df['action_level'].shift(2)

third_level_mask = (
    (df['level'] == 0) &
    (df['action_level'] == 3) &
    (df['prev_level'] == 0) &
    (df['prev_action_level'] == 2) &
    (df['prev_prev_level'] == 1) &
    (df['prev_prev_action_level'] == 1)
)

third_level_df = df[third_level_mask].groupby(['prev_prev_action', 'prev_action', 'action']).size().reset_index(name='count')
third_level_actions = {}
for first_level in third_level_df['prev_prev_action'].unique():
    for second_level in third_level_df[third_level_df['prev_prev_action'] == first_level]['prev_action'].unique():
        top_5 = third_level_df[(third_level_df['prev_prev_action'] == first_level) &
                               (third_level_df['prev_action'] == second_level)].nlargest(5, 'count')
        for _, row in top_5.iterrows():
            key = f"{first_level}-{second_level}-{row['action']}"
            third_level_actions[key] = int(row['count'])

# Prepare the JSON output
result = {
    "top_5_first_level_actions": first_level_actions,
    "top_5_second_level_actions_per_first": second_level_actions,
    "top_5_third_level_actions_per_combo": third_level_actions
}

# Function to remove NaN values and convert to int
def clean_dict(d):
    if isinstance(d, dict):
        return {str(k): clean_dict(v) for k, v in d.items() if not (isinstance(v, float) and np.isnan(v))}
    elif isinstance(d, float) and not np.isnan(d):
        return int(d)
    else:
        return d

# Clean the result dictionary
result = clean_dict(result)

# Convert to JSON
json_output = json.dumps(result, indent=2)

# Print the JSON output
print(json_output)

# Optionally, save to a file
# with open('action_analysis.json', 'w') as f:
#     json.dump(result, f, indent=2)

{
  "top_5_first_level_actions": {
    "a12": 7,
    "a13": 5,
    "a14": 4,
    "a11": 2,
    "a15": 1
  },
  "top_5_second_level_actions_per_first": {
    "a11-a13": 1,
    "a11-a14": 1,
    "a12-a15": 3,
    "a12-a13": 2,
    "a12-a11": 1,
    "a12-a14": 1,
    "a13-a14": 2,
    "a13-a11": 1,
    "a13-a12": 1,
    "a13-a15": 1,
    "a14-a11": 2,
    "a14-a13": 2,
    "a15-a11": 1
  },
  "top_5_third_level_actions_per_combo": {
    "a11-a13-a15": 1,
    "a11-a14-a13": 1,
    "a12-a11-a14": 1,
    "a12-a13-a11": 2,
    "a12-a14-a15": 1,
    "a12-a15-a14": 2,
    "a12-a15-a11": 1,
    "a13-a11-a12": 1,
    "a13-a12-a11": 1,
    "a13-a14-a11": 2,
    "a13-a15-a14": 1,
    "a14-a11-a12": 2,
    "a14-a13-a15": 2,
    "a15-a11-a14": 1
  }
}


In [24]:
# Final one sent to Alok with sorting enabled
import pandas as pd
import json
import numpy as np
from collections import OrderedDict

# Read the CSV file
df = pd.read_csv('fcsample - Action_Dataset.csv')

# 1. Top 5 1st level actions
first_level_actions = df[(df['level'] == 1) & (df['action_level'] == 1)]['action'].value_counts().nlargest(5).to_dict()
first_level_actions = OrderedDict(sorted(first_level_actions.items(), key=lambda x: x[1], reverse=True))

# 2. Top 5 2nd level actions for each 1st level action
df['prev_action'] = df['action'].shift()
df['prev_level'] = df['level'].shift()
df['prev_action_level'] = df['action_level'].shift()

second_level_mask = (
    (df['level'] == 0) &
    (df['action_level'] == 2) &
    (df['prev_level'] == 1) &
    (df['prev_action_level'] == 1)
)

second_level_df = df[second_level_mask].groupby(['prev_action', 'action']).size().reset_index(name='count')
second_level_actions = {}
for first_level in second_level_df['prev_action'].unique():
    top_5 = second_level_df[second_level_df['prev_action'] == first_level].nlargest(5, 'count')
    for _, row in top_5.iterrows():
        key = f"{first_level}-{row['action']}"
        second_level_actions[key] = int(row['count'])
second_level_actions = OrderedDict(sorted(second_level_actions.items(), key=lambda x: x[1], reverse=True))

# 3. Top 5 3rd level actions for each 1st/2nd level action combination
df['prev_prev_action'] = df['action'].shift(2)
df['prev_prev_level'] = df['level'].shift(2)
df['prev_prev_action_level'] = df['action_level'].shift(2)

third_level_mask = (
    (df['level'] == 0) &
    (df['action_level'] == 3) &
    (df['prev_level'] == 0) &
    (df['prev_action_level'] == 2) &
    (df['prev_prev_level'] == 1) &
    (df['prev_prev_action_level'] == 1)
)

third_level_df = df[third_level_mask].groupby(['prev_prev_action', 'prev_action', 'action']).size().reset_index(name='count')
third_level_actions = {}
for first_level in third_level_df['prev_prev_action'].unique():
    for second_level in third_level_df[third_level_df['prev_prev_action'] == first_level]['prev_action'].unique():
        top_5 = third_level_df[(third_level_df['prev_prev_action'] == first_level) &
                               (third_level_df['prev_action'] == second_level)].nlargest(5, 'count')
        for _, row in top_5.iterrows():
            key = f"{first_level}-{second_level}-{row['action']}"
            third_level_actions[key] = int(row['count'])
third_level_actions = OrderedDict(sorted(third_level_actions.items(), key=lambda x: x[1], reverse=True))

# Prepare the JSON output
result = {
    "top_5_first_level_actions": first_level_actions,
    "top_5_second_level_actions_per_first": second_level_actions,
    "top_5_third_level_actions_per_combo": third_level_actions
}

# Function to remove NaN values and convert to int
def clean_dict(d):
    if isinstance(d, dict):
        return OrderedDict((str(k), clean_dict(v)) for k, v in d.items() if not (isinstance(v, float) and np.isnan(v)))
    elif isinstance(d, float) and not np.isnan(d):
        return int(d)
    else:
        return d

# Clean the result dictionary
result = clean_dict(result)

# Convert to JSON
json_output = json.dumps(result, indent=2)

# Print the JSON output
print(json_output)

# Optionally, save to a file
# with open('action_analysis.json', 'w') as f:
#     json.dump(result, f, indent=2)

{
  "top_5_first_level_actions": {
    "a12": 7,
    "a13": 5,
    "a14": 4,
    "a11": 2,
    "a15": 1
  },
  "top_5_second_level_actions_per_first": {
    "a12-a15": 3,
    "a12-a13": 2,
    "a13-a14": 2,
    "a14-a11": 2,
    "a14-a13": 2,
    "a11-a13": 1,
    "a11-a14": 1,
    "a12-a11": 1,
    "a12-a14": 1,
    "a13-a11": 1,
    "a13-a12": 1,
    "a13-a15": 1,
    "a15-a11": 1
  },
  "top_5_third_level_actions_per_combo": {
    "a12-a13-a11": 2,
    "a12-a15-a14": 2,
    "a13-a14-a11": 2,
    "a14-a11-a12": 2,
    "a14-a13-a15": 2,
    "a11-a13-a15": 1,
    "a11-a14-a13": 1,
    "a12-a11-a14": 1,
    "a12-a14-a15": 1,
    "a12-a15-a11": 1,
    "a13-a11-a12": 1,
    "a13-a12-a11": 1,
    "a13-a15-a14": 1,
    "a15-a11-a14": 1
  }
}


In [25]:
import pandas as pd
import json
import numpy as np
from collections import OrderedDict

# Read the CSV file
df = pd.read_csv('fcsample - Action_Dataset.csv')

# 1. Top 5 1st level actions
first_level_actions = df[(df['level'] == 1) & (df['action_level'] == 1)]['action'].value_counts().nlargest(5).to_dict()
first_level_actions = OrderedDict(sorted(first_level_actions.items(), key=lambda x: x[1], reverse=True))

# 2. Top 5 2nd level actions for each 1st level action
df['prev_action'] = df['action'].shift()
df['prev_level'] = df['level'].shift()
df['prev_action_level'] = df['action_level'].shift()

second_level_mask = (
    (df['level'] == 0) &
    (df['action_level'] == 2) &
    (df['prev_level'] == 1) &
    (df['prev_action_level'] == 1)
)

second_level_df = df[second_level_mask].groupby(['prev_action', 'action']).size().reset_index(name='count')
second_level_actions = {}
for first_level in second_level_df['prev_action'].unique():
    top_5 = second_level_df[second_level_df['prev_action'] == first_level].nlargest(5, 'count')
    for _, row in top_5.iterrows():
        key = f"{first_level}-{row['action']}"
        second_level_actions[key] = {
            'count': int(row['count']),
            'level1_count': first_level_actions.get(first_level, 0)
        }
second_level_actions = OrderedDict(sorted(second_level_actions.items(), key=lambda x: x[1]['count'], reverse=True))

# 3. Top 5 3rd level actions for each 1st/2nd level action combination
df['prev_prev_action'] = df['action'].shift(2)
df['prev_prev_level'] = df['level'].shift(2)
df['prev_prev_action_level'] = df['action_level'].shift(2)

third_level_mask = (
    (df['level'] == 0) &
    (df['action_level'] == 3) &
    (df['prev_level'] == 0) &
    (df['prev_action_level'] == 2) &
    (df['prev_prev_level'] == 1) &
    (df['prev_prev_action_level'] == 1)
)

third_level_df = df[third_level_mask].groupby(['prev_prev_action', 'prev_action', 'action']).size().reset_index(name='count')
third_level_actions = {}
for first_level in third_level_df['prev_prev_action'].unique():
    for second_level in third_level_df[third_level_df['prev_prev_action'] == first_level]['prev_action'].unique():
        top_5 = third_level_df[(third_level_df['prev_prev_action'] == first_level) &
                               (third_level_df['prev_action'] == second_level)].nlargest(5, 'count')
        for _, row in top_5.iterrows():
            key = f"{first_level}-{second_level}-{row['action']}"
            third_level_actions[key] = {
                'count': int(row['count']),
                'level1_count': first_level_actions.get(first_level, 0),
                'level2_count': second_level_actions.get(f"{first_level}-{second_level}", {}).get('count', 0)
            }
third_level_actions = OrderedDict(sorted(third_level_actions.items(), key=lambda x: x[1]['count'], reverse=True))

# Prepare the JSON output
result = {
    "top_5_first_level_actions": first_level_actions,
    "top_5_second_level_actions_per_first": second_level_actions,
    "top_5_third_level_actions_per_combo": third_level_actions
}

# Function to remove NaN values and convert to int
def clean_dict(d):
    if isinstance(d, dict):
        return OrderedDict((str(k), clean_dict(v)) for k, v in d.items() if not (isinstance(v, float) and np.isnan(v)))
    elif isinstance(d, float) and not np.isnan(d):
        return int(d)
    else:
        return d

# Clean the result dictionary
result = clean_dict(result)

# Convert to JSON
json_output = json.dumps(result, indent=2)

# Print the JSON output
print(json_output)

# Optionally, save to a file
# with open('action_analysis.json', 'w') as f:
#     json.dump(result, f, indent=2)

{
  "top_5_first_level_actions": {
    "a12": 7,
    "a13": 5,
    "a14": 4,
    "a11": 2,
    "a15": 1
  },
  "top_5_second_level_actions_per_first": {
    "a12-a15": {
      "count": 3,
      "level1_count": 7
    },
    "a12-a13": {
      "count": 2,
      "level1_count": 7
    },
    "a13-a14": {
      "count": 2,
      "level1_count": 5
    },
    "a14-a11": {
      "count": 2,
      "level1_count": 4
    },
    "a14-a13": {
      "count": 2,
      "level1_count": 4
    },
    "a11-a13": {
      "count": 1,
      "level1_count": 2
    },
    "a11-a14": {
      "count": 1,
      "level1_count": 2
    },
    "a12-a11": {
      "count": 1,
      "level1_count": 7
    },
    "a12-a14": {
      "count": 1,
      "level1_count": 7
    },
    "a13-a11": {
      "count": 1,
      "level1_count": 5
    },
    "a13-a12": {
      "count": 1,
      "level1_count": 5
    },
    "a13-a15": {
      "count": 1,
      "level1_count": 5
    },
    "a15-a11": {
      "count": 1,
      "level1_count"