In [None]:
import pandas as pd
import plotly.express as px

In [None]:
df = pd.read_csv('zkp_repos.csv', sep=';')
commit_df = pd.read_csv('tool_commits.csv')
contributors_df = pd.read_csv('contributor_data.csv')

#### Preprocess Data

In [None]:
df = df.rename({"Tool Resources (Twitter, Discord, Website etc.)": "Tool Resources"}, axis='columns')

df_applications = df[df['Type'] == 'Application']

df_applications['Tool'] = df_applications['Tool'].str.split(', ')
df_applications['Tool'] = df_applications['Tool'].apply(lambda x: [item for item in x if item != ''])


#### Relationship Between Tool Use and External Resources Available for Tool

In [None]:
df_applications = df[df['Type'] == 'Application']

df_applications['Tool'] = df_applications['Tool'].str.split(', ')
df_applications['Tool'] = df_applications['Tool'].apply(lambda x: [item for item in x if item != ''])

expanded_df = df_applications.explode('Tool')

tool_counts = expanded_df['Tool'].value_counts().reset_index()
tool_counts.columns = ['Tool', 'Frequency']

tool_resources = df[df['Type'] == 'Tool']
tool_resources['Name'] = tool_resources['Name'].str.lower()
tool_resources = tool_resources.groupby('Name')['Tool Resources'].max().reset_index()
tool_resources.columns = ['Tool', 'Tool Resources']
tool_data = pd.merge(tool_counts, tool_resources, on='Tool', how='left')


fig = px.bar(tool_data, x='Tool', y='Frequency',
             title='Relationship Between Tool Frequency and External Resources',
             labels={'Frequency': 'Tool Frequency', 'Tool Resources': 'Tool Resources Available'},
             template="plotly_dark",
             color='Tool Resources',
             text='Frequency',
             category_orders={"Tool": tool_data["Tool"].tolist()}
             )

fig.update_layout(legend_title_text='Tool Resources')
fig.show()


#### All Commits Over Time for Various Repositories

In [None]:
all_commits_df = commit_df.sort_values(by='CommitterDate')
all_commits_df['UniqueID'] = all_commits_df['Name'] + ' (' + all_commits_df['Owner'] + ')'

fig = px.scatter(all_commits_df, x='CommitterDate', y='UniqueID', color='UniqueID',
                 title='All Commits Over Time for Various Repositories',
                 labels={'Date': 'Commit Date', 'Name': 'Repository'},
                 template="plotly_dark")

fig.update_xaxes(title_text='Date')
fig.update_yaxes(title_text='Repository')
fig.update_layout(legend_title_text='Repository')
fig.show()


#### Committer Count per Tool Repository

In [None]:
committers_df = commit_df
committers_df['UniqueID'] = commit_df['Name'] + '/' + commit_df['Owner']
committers_df['Committer'] = committers_df['Committer'].str.lower()
author_counts = committers_df.groupby('UniqueID')['Committer'].nunique().reset_index()
author_counts.rename(columns={'Committer': 'CommitterCount'}, inplace=True)
author_counts.sort_values(by=['CommitterCount'], ascending=False, inplace=True)

fig = px.bar(author_counts, x='UniqueID', y='CommitterCount', 
             title='Committer Count per Repository',
             template="plotly_dark",
             text='CommitterCount',
             labels={'CommitterCount': 'No. of Committers', 'UniqueID': 'Repository'},)
fig.show()


#### Contributor Count per Tool Repository

In [None]:
contributors_df['UniqueID'] = contributors_df['RepositoryName'] + '/' + contributors_df['Owner']
contributors_df = contributors_df[contributors_df['UniqueID'].isin(committers_df['UniqueID'])]
contributors_df['Contributor'] = contributors_df['Contributor'].str.lower()
contributor_counts = contributors_df.groupby('UniqueID')['Contributor'].nunique().reset_index()
contributor_counts.rename(columns={'Contributor': 'ContributorCount'}, inplace=True)
contributor_counts.sort_values(by=['ContributorCount'], ascending=False, inplace=True)


fig = px.bar(contributor_counts, x='UniqueID', y='ContributorCount', 
             title='Contributor Count per Repository',
             template="plotly_dark",
             text='ContributorCount',
             labels={'ContributorCount': 'No. of Contributors', 'UniqueID': 'Repository'}
             )
fig.show()
