### Understanding what drives tool usage

In [None]:
import pandas as pd
import plotly.express as px
import numpy as np
from datetime import datetime, timezone

In [None]:
zkp_repos = pd.read_csv('zkp_repos.csv')
tool_commits = pd.read_csv('tool_commits.csv')
tool_issues = pd.read_csv('tool_issues.csv')
repo_contributors = pd.read_csv('repo_contributors.csv')
contributor_data = pd.read_csv('contributor_data.csv')
branches_data = pd.read_csv('branches_data.csv')

##### Get AppCount

In [None]:

zkp_repos.loc[zkp_repos['Type'] == 'Application', 'Tool'] = zkp_repos['Tool'].str.split(', ')
zkp_repos.loc[zkp_repos['Type'] == 'Application', 'Tool'] = zkp_repos.loc[zkp_repos['Type'] == 'Application', 'Tool'].apply(lambda x: [item for item in x if item != ''])
zkp_repos_exploded = zkp_repos.explode('Tool')
zkp_repos_exploded.reset_index(drop=True, inplace=True)

In [None]:
tool_counts = zkp_repos_exploded['Tool'].value_counts().reset_index()

tool_counts.columns = ['Tool', 'AppCount']
tool_counts = tool_counts.merge(zkp_repos[zkp_repos['Type'] == 'Tool'], left_on='Tool', right_on='Name', how='left')[['UniqueID', 'AppCount']]
tool_counts

#### App Count & Issue Resolution

In [None]:

issue_counts = tool_issues.groupby(['UniqueID', 'State']).size().unstack(fill_value=0)
issue_counts.reset_index(inplace=True)
issue_counts.columns = ['UniqueID', 'Closed', 'Open']
issue_counts['IssueResolutionRate']  = (issue_counts['Closed']) / (issue_counts['Closed'] + issue_counts['Open']) 
issue_counts.sort_values('IssueResolutionRate', ascending=False, inplace=True)

merged_tools_issues =  issue_counts[['UniqueID', 'IssueResolutionRate' ]].merge(tool_counts, left_on='UniqueID', right_on='UniqueID', how='outer').sort_values('AppCount')

fig = px.scatter(merged_tools_issues, x='IssueResolutionRate', y='AppCount', 
                color='UniqueID',
                template="plotly_dark",
                title='Application Usage and Issue Resolution Rate')

fig.show()

#### App Count & Issues Opened per Month

In [None]:
monthly_opened_issues = tool_issues

monthly_opened_issues['CreatedAt'] = pd.to_datetime(monthly_opened_issues['CreatedAt'])
monthly_opened_issues['CreatedYearMonth'] = monthly_opened_issues['CreatedAt'].dt.to_period('M')
monthly_opened_issues = monthly_opened_issues[['UniqueID', 'CreatedYearMonth']].rename(columns={'CreatedYearMonth': 'YearMonth', 'UniqueID': 'UniqueID'})
monthly_opened_issues = monthly_opened_issues.groupby(['UniqueID', 'YearMonth']).size().reset_index(name='OpenedCount')
monthly_opened_issues = monthly_opened_issues.groupby('UniqueID')['OpenedCount'].mean().reset_index(name='AverageOpenedPerMonth')
monthly_opened_issues = monthly_opened_issues.merge(tool_counts, left_on='UniqueID', right_on='UniqueID', how='left')

fig = px.scatter(monthly_opened_issues, x='AverageOpenedPerMonth', y='AppCount', 
                color='UniqueID',
                template="plotly_dark",
                title='Application Usage and Issues Opened per Month')

fig.show()

In [None]:
monthly_closed_issues = tool_issues

monthly_closed_issues['CreatedAt'] = pd.to_datetime(monthly_closed_issues['CreatedAt'])
monthly_closed_issues['ClosedYearMonth'] = monthly_closed_issues['CreatedAt'].dt.to_period('M')
monthly_closed_issues = monthly_closed_issues[['UniqueID', 'ClosedYearMonth']].rename(columns={'ClosedYearMonth': 'YearMonth', 'UniqueID': 'UniqueID'})
monthly_closed_issues = monthly_closed_issues.groupby(['UniqueID', 'YearMonth']).size().reset_index(name='ClosedCount')
monthly_closed_issues = monthly_closed_issues.groupby('UniqueID')['ClosedCount'].mean().reset_index(name='AverageClosedPerMonth')
monthly_closed_issues = monthly_closed_issues.merge(tool_counts, left_on='UniqueID', right_on='UniqueID', how='left')

fig = px.scatter(monthly_closed_issues, x='AverageClosedPerMonth', y='AppCount', 
                color='UniqueID',
                template="plotly_dark",
                title='Application Usage and Issues Closed per Month')

fig.show()

#### App Count & Language

In [None]:
language_tool_counts = tool_counts.merge(zkp_repos[zkp_repos['Type'] == 'Tool'], left_on='UniqueID', right_on='UniqueID', how='right')[['UniqueID', 'AppCount', 'Language']]
language_tool_counts

fig = px.scatter(language_tool_counts, x='UniqueID', y='AppCount', 
                color='Language',
                template="plotly_dark",
                title='Application Usage and Issue Resolution Rate')

fig.show()

#### App Count & Contributors

In [None]:
repo_contributors = repo_contributors[repo_contributors['UniqueID'].isin(tool_counts['UniqueID'])]
contributor_counts = repo_contributors.groupby('UniqueID')['Contributor'].nunique().reset_index()
contributor_counts.rename(columns={'Contributor': 'ContributorCount'}, inplace=True)
contributor_counts = contributor_counts.merge(tool_counts, left_on='UniqueID', right_on='UniqueID')
contributor_counts.sort_values(by=['ContributorCount'], ascending=False, inplace=True)

fig = px.scatter(contributor_counts, x='ContributorCount', y='AppCount', 
             title='Application Usage and Contributor Count',
             template="plotly_dark",
             color='UniqueID',
             labels={'ContributorCount': 'No. of Contributors', 'UniqueID': 'Repository'}
             )
fig.show()

#### App Count & Tool Age

In [None]:
tool_age = zkp_repos[zkp_repos['Type'] == 'Tool']
tool_age['Created'] = pd.to_datetime(tool_age['Created'])
tool_age['Age'] = (datetime.now(timezone.utc) - tool_age['Created']).dt.days
tool_age = tool_age[['UniqueID', 'Age']]
tool_age = tool_age.merge(tool_counts, left_on='UniqueID', right_on='UniqueID', how='left')

fig = px.scatter(tool_age, x='Age', y='AppCount', 
             title='Application Usage and Age',
             template="plotly_dark",
             color='UniqueID',
             labels={'UniqueID': 'Repository'}
             )
fig.show()

#### App Count & Commit Count

In [None]:
total_commits = tool_commits
total_commits = total_commits.groupby('UniqueID')['CommitHash'].nunique().reset_index()
total_commits.columns = ['UniqueID', 'CommitCount']
total_commits = total_commits.merge(tool_counts, left_on='UniqueID', right_on='UniqueID', how='right')

fig = px.scatter(total_commits, x='CommitCount', y='AppCount', 
             title='Application Usage and CommitCount',
             template="plotly_dark",
             color='UniqueID',
             labels={'UniqueID': 'Repository'}
             )
fig.show()

#### App Count & Commit Frequency

In [None]:
commit_frequency = tool_commits
commit_frequency['CommitterDate']
commit_frequency['CommitterDate'] = pd.to_datetime(commit_frequency['CommitterDate'], utc=True)
commit_frequency['YearMonth'] = commit_frequency['CommitterDate'].dt.to_period('M')
commit_frequency = commit_frequency.groupby(['UniqueID', 'YearMonth']).size().reset_index(name='CommitCount')
commit_frequency = commit_frequency.groupby('UniqueID')['CommitCount'].mean().reset_index(name='AverageCommitFrequency')
commit_frequency = commit_frequency.merge(tool_counts, left_on='UniqueID',  right_on='UniqueID', how='left')

fig = px.scatter(commit_frequency, x='AverageCommitFrequency', y='AppCount', 
             title='Application Usage and CommitFrequency',
             template="plotly_dark",
             color='UniqueID',
             labels={'UniqueID': 'Repository'}
             )
fig.show()

#### App Count & Branch Count

In [None]:
branch_count = branches_data.groupby('UniqueID').size().reset_index(name='BranchCount')
branch_count = branch_count.merge(tool_counts, left_on='UniqueID',  right_on='UniqueID', how='left')

fig = px.scatter(branch_count, x='BranchCount', y='AppCount', 
             title='Application Usage and BranchCount',
             template="plotly_dark",
             color='UniqueID',
             labels={'UniqueID': 'Repository'}
             )
fig.show()

#### App Count & Active Committers

In [None]:
active_committers = tool_commits
active_committers['CommitterDate'] = pd.to_datetime(active_committers['CommitterDate'], utc=True)
active_committers = active_committers.loc[active_committers['CommitterDate'].dt.year == 2023]
active_committers = active_committers.merge(zkp_repos.loc[zkp_repos['Type'] == 'Tool'], left_on='UniqueID',  right_on='UniqueID', how='right')
active_committers = active_committers.groupby('UniqueID').size().reset_index(name='ActiveCommitters')
active_committers = active_committers.merge(tool_counts, left_on='UniqueID',  right_on='UniqueID', how='left')

fig = px.scatter(active_committers, x='ActiveCommitters', y='AppCount', 
             title='Application Usage and Active Committers',
             template="plotly_dark",
             color='UniqueID',
             labels={'UniqueID': 'Repository'}
             )
fig.show()

#### App Count & New Committers

In [None]:
new_committers = tool_commits

new_committers['CommitterDate'] = pd.to_datetime(new_committers['CommitterDate'])
new_committers = new_committers[new_committers['CommitterDate'].dt.year == 2023]
new_committers = new_committers.groupby(['UniqueID', 'Committer'])['CommitterDate'].min().reset_index()
new_committers = new_committers[new_committers['CommitterDate'].dt.year == 2023]
new_committers = new_committers.groupby('UniqueID')['Committer'].count().reset_index(name='NewCommitterCount')
new_committers = new_committers.merge(tool_counts, left_on='UniqueID',  right_on='UniqueID', how='right').fillna(0)

fig = px.scatter(new_committers, x='NewCommitterCount', y='AppCount', 
             title='Application Usage and New Committers',
             template="plotly_dark",
             color='UniqueID',
             labels={'UniqueID': 'Repository'}
             )
fig.show()

#### App Count & Recent Commits

In [None]:
new_commits = tool_commits

new_commits['CommitterDate'] = pd.to_datetime(new_commits['CommitterDate'], utc=True)
new_commits = new_commits[new_commits['CommitterDate'].dt.year == 2023]
new_commits = new_commits.groupby('UniqueID').size().reset_index(name='NewCommits')
new_commits = new_commits.merge(tool_counts, left_on='UniqueID',  right_on='UniqueID', how='right').fillna(0)

fig = px.scatter(new_commits, x='NewCommits', y='AppCount', 
             title='Application Usage and New Committs',
             template="plotly_dark",
             color='UniqueID',
             labels={'UniqueID': 'Repository'}
             )
fig.show()