In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/github-activities-for-asa/github.csv


In [2]:
import random #generates random numbers 
from datetime import datetime #supplies classes for manipulating dates and times

import plotly #interactive, publication--quality graphing library
import plotly.express as px #plotly high level interface
import plotly.graph_objects as go #for hovering on point-like objects
from plotly.subplots import make_subplots #constructs figures with arbitrary subplots
from plotly.offline import init_notebook_mode, iplot, plot #to generate graphs offline and save them in local machine
from wordcloud import WordCloud, STOPWORDS #visualizes frequent words in a text in different frequencies
import seaborn as sns #visualization library based on matplotlib
import matplotlib.pyplot as plt #provides an object-oriented API for embedding plots into applications
%matplotlib inline 
#to enable inline plotting

In [3]:
df= pd.read_csv("/kaggle/input/github-activities-for-asa/github.csv") #load github data scrapped from the asalytics db

# **Github Analysis**

In [4]:
class github_analysis():
    '''
    A preliminary class that preprocesses asalytics github data to prepare it for 
    primary processing and further analysis
    '''
    def __init__(self, data): #this special method takes in the data, gets called automatically when any object of the github analysis class is created
        self.data= data
    
    def extract_date(self, date_columns:list): 
        '''this function inputs all columns with date related values, outputs a new 
        dataframe with all date related components extracted'''
        data= self.data
        data.loc[:,date_columns] = data.loc[:,date_columns].apply(pd.to_datetime, errors='coerce')
        for x in date_columns:
            data[x +'_day'] = data[x].dt.day
            data[x +'_month'] = data[x].dt.month
            data[x + '_year'] = data[x].dt.year
            data[x +'_weekday'] = data[x].dt.day_name()
        return data        

In [5]:
df= github_analysis(df).extract_date(['last_push_date', "date_created"]) #instantiation of the github analysis function. This returns a clean dataframe, ready for analysis
df.head()

Unnamed: 0,repo_name,repo_desc,date_created,last_push_date,language,no_of_forks,no_of_stars,no_of_watchers,no_of_contributors,no_of_commits,...,pull_requests,asa_id,last_push_date_day,last_push_date_month,last_push_date_year,last_push_date_weekday,date_created_day,date_created_month,date_created_year,date_created_weekday
0,ChoiceCoin/Algorand-Protocol,Repo for Algorand protocol software.,2022-01-25,2022-02-27,JavaScript,20,5,5,19,88,...,0,ChoiceCoin,27,2,2022,Sunday,25,1,2022,Tuesday
1,ChoiceCoin/asa-list,Tinyman - Algorand Standard Assets List,2022-02-16,2022-03-30,JavaScript,0,0,0,70,401,...,0,ChoiceCoin,30,3,2022,Wednesday,16,2,2022,Wednesday
2,ChoiceCoin/ASAlytics,Repo for ASAlytics.,2022-02-16,2022-04-11,Python,3,8,8,2,3,...,0,ChoiceCoin,11,4,2022,Monday,16,2,2022,Wednesday
3,ChoiceCoin/Choice-Charities,Repository for Choice Charities initiative.,2021-10-26,2021-10-26,Python,1,8,8,2,6,...,0,ChoiceCoin,26,10,2021,Tuesday,26,10,2021,Tuesday
4,ChoiceCoin/Choice-V1,Repository detailing Choice Coin's Creation an...,2021-07-23,2022-04-11,Python,243,266,266,3,41,...,6,ChoiceCoin,11,4,2022,Monday,23,7,2021,Friday


In [6]:
def choose_asa(df): #this function allows a user to select a specific asset to analyze
    asa = input("Select an ASA to analyze: ")
    df = df[df['asa_id'].str.contains(asa, case = False)]
    df.reset_index(inplace = True)
    return df

In [7]:
#allocation of data corresponding to each asa name
choice_data= df[df['asa_id']== 'ChoiceCoin']
algofi_data= df[df['asa_id']== 'Algofiorg']
shosha_data= df[df['asa_id']== 'ShoshaDev']

In [8]:
choice_data['repo_name'] = choice_data['repo_name'].str.lstrip("ChoiceCoin/")
algofi_data['repo_name'] = algofi_data['repo_name'].str.lstrip("Algofiorg/")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



# **Github Visualizations**

In [9]:
class github_plot():
    '''
    this function uses python graphing libraries to gain key insights into asalytics github data
    through different graphical representations.
    '''
    def __init__(self, data): #this special method takes in the data, gets called automatically when any object of the github plot class is created
        self.data= data

    def activities_per_date(self):
        '''
        this function of the github plot class uses plotly subplots library in showing all the 
        github activities of a specified assets in rows and columns format
        The activities are;
        - Sum of number of valid contributions across all repositories per date.
        - Sum of number of valid commits across all repositories per date
        - Sum of number of stars across all repositoried per date
        - Sum of number of forks made across all repositoried per date
        - Sum of merged pull requests across all repositoried per date
        - sum of active issues created across all repositoried per date
        Note: Analyses are shown at different month intervals        
        '''
        data= self.data
        asa_name= self.data['asa_id'].mode()[0]

        fig = make_subplots(rows=3, cols=2, subplot_titles=("no_of_contributors", "no_of_commits", "no_of_forks",
                                                            "pull requests", "no_of_stars", "issues"))

        fig.add_trace(go.Scatter(x=data.reset_index()['last_push_date'], y=data.reset_index()['no_of_contributors'], stackgroup = 'one', 
                                 name= "no_of_contributors"), row=1, col=1)

        fig.add_trace(go.Scatter(x=data.reset_index()['last_push_date'], y=data.reset_index()['no_of_forks'], stackgroup = 'one',
                                 name= "no_of_forks"), row=2, col=1)

        fig.add_trace(go.Scatter(x=data.reset_index()['last_push_date'], y=data.reset_index()['no_of_stars'], stackgroup = 'one',
                                 name= "no_of_stars"), row=3, col=1)

        fig.add_trace(go.Scatter(x=data.reset_index()['last_push_date'], y=data.reset_index()['no_of_commits'], stackgroup = 'one',
                                 name= "no_of_commits"), row=1, col=2)

        fig.add_trace(go.Scatter(x=data.reset_index()['last_push_date'], y=data.reset_index()['pull_requests'], stackgroup = 'one',
                                 name= "pull requests"), row=2, col=2)
        fig.add_trace(go.Scatter(x=data.reset_index()['last_push_date'], y=data.reset_index()['issues'], stackgroup = 'one', 
                                 name= "issues"), row=3, col=2)

        fig.update_layout(height=800, title_text=f"{asa_name} Github Activities per Date")
        fig.update_layout(yaxis={'showgrid': True}
                      ,xaxis={'showgrid': True}
                      ,template='plotly_dark'
                      ,hovermode="x"
                      ,legend=dict(y=1, x=1, font=dict(size=10))
                      ,font=dict(size=12, color='white')
                      ,title={'y':0.95 ,'x':0.5,'xanchor': 'center','yanchor': 'top','font_size':20, 'font_color':'white'}) #sauce to improve plots' layout

        for i in range(1,7): #assign titles to x and y axes
            fig['layout']['xaxis{}'.format(i)]['title']='Date'
            fig['layout']['yaxis{}'.format(i)]['title']='Count'
        return iplot(fig) #this displays the final plot


    
    def set_activity(self, entity:str): 
        '''
        This function of the github plot class shows a time series representation of each 
        github activity using the plotly subplots library. 
        Trends are displayed in day of the week, weekday names, day of the month and year. 
        It inputs a specific activity of a specified asset, captures the sum of each data point, 
        represents them in dots, and outputs a line that traces and merges the dots.
        
        '''
        data= self.data
        asa_name= self.data['asa_id'].mode()[0]
        color_set = {"red", "green", "yellow", "blue", "magenta", "orange"}
        item= random.choice(tuple(color_set))

        def hbar(col):
            temp = data.groupby(col).agg({entity : "sum"}).reset_index()
            temp = temp.sort_values(col, ascending = False)
            c = {
                'y' : list(temp[entity]), 
                'x' : list(temp[col]), }
            trace = go.Scatter(y=c['y'], x=c['x'], marker=dict(color= item), text= entity)
            return trace
        
            layout = go.Layout(width=650)
            fig = go.Figure([trace], layout=layout)
            fig.update_xaxes(tickangle=45)
            fig.update_yaxes(tickangle=0)
            fig.show()
            
        trace1 = hbar('last_push_date_day') 
        trace2 = hbar('last_push_date_month') 
        trace3 = hbar('last_push_date_weekday') 
        trace4 = hbar('last_push_date_year') 

        titles = ['day of week', 'weekday', 'day of month', 'day of the year']
        titles = [f'{entity} per ' + _ for _ in titles]
        fig = make_subplots(rows=2, cols=2, subplot_titles = titles)

        fig.add_trace(trace1, row=1, col=1)
        fig.add_trace(trace2, row=2, col=1)
        fig.add_trace(trace3, row=1, col=2)
        fig.add_trace(trace4, row=2, col=2)

        fig.update_layout(height=500, hovermode="x", template= "plotly_dark", showlegend= False, title_text= f"{asa_name} {entity}",
                          font=dict(size=12, color='white'),
                          title={'y':0.95 ,'x':0.5,'xanchor': 'center','yanchor': 'top','font_size':20, 'font_color':'white'}) #sauce to improve plots' layout

        for i in range(1,5): #assign titles to x and y axes
            fig['layout']['xaxis{}'.format(i)]['title']='Date'
            fig['layout']['yaxis{}'.format(i)]['title']='Count'
        return iplot(fig) #this displays the final plots
    
    
    
    def activities_per_repo(self):
        '''
        this function of the github plot class uses plotly subplots library in showing all the 
        github activities of a specified assets in rows and columns format
        The activities are;
        - Sum of number of valid contributions per repository on a specified asset github account
        - Sum of number of valid commits per repository on a specified asset github account
        - Sum of number of stars per repository allocated to a specified asset github account
        - Sum of number of forks made per repository on a specified asset github account
        - Sum of merged pull requests per repository on a specified asset github account
        - sum of active issues created per repository on a specified asset github account
        Note: Analyses are shown at different month intervals        
        '''
        data= self.data
        color_set = {"red", "green", "blue", "magenta"}
        item= random.choice(tuple(color_set))
        
        def vbar(col):
            temp = data.groupby("repo_name").agg({col : "sum"}).reset_index()
            temp = temp.sort_values(col, ascending = False)
            c = {
                'x' : list(temp['repo_name'])[::-1], 
                'y' : list(temp[col])[::-1],
            }
            if len(data['repo_name'].index) >= 15:
                trace = go.Bar(x=[str(_) + "    " for _ in c['y']], y=c['x'], orientation= "h", marker=dict(color=item))
            else:
                trace = go.Bar(y=[str(_) + "    " for _ in c['y']], x=c['x'], orientation= "v", marker=dict(color=item))
                
            return trace 

            layout = go.Layout(width=650)
            fig = go.Figure([trace], layout=layout)
            fig.update_xaxes(tickangle=45)
            fig.update_yaxes(tickangle=0)
            fig.show()

        trace1 = vbar('no_of_commits') 
        trace2 = vbar('no_of_stars') 
        trace3 = vbar('no_of_forks') 
        trace4 = vbar('no_of_contributors')
        trace5= vbar("pull_requests")
        trace6= vbar("issues")

        titles = ['Commits', 'Stars', 'Forks', 'Contributors', "Pull Requests", "Issues"]
        titles = ['Count of ' + _ + " per Repo" for _ in titles]
        fig = make_subplots(rows=3, cols=2, subplot_titles = titles)

        fig.add_trace(trace1, row=1, col=1)
        fig.add_trace(trace2, row=1, col=2)
        fig.add_trace(trace3, row=2, col=1)
        fig.add_trace(trace4, row=2, col=2)
        fig.add_trace(trace5, row=3, col=1)
        fig.add_trace(trace6, row=3, col=2)
        asa_name= data['asa_id'].mode()[0]
        fig.update_layout(height=1000, title_text=f"{asa_name} Github Activities per Repo", template= "plotly_dark",
                          showlegend = False, hovermode="x unified",
                          legend=dict(y=1, x=1, font=dict(size=10)),
                  title={'y':0.95 ,'x':0.5,'xanchor': 'center','yanchor': 'top','font_size':20, 'font_color':'white'})       
        fig.show()
        

# **Choice Coin**


 Using the CHOICECOIN asset Github account as a Case Study.

## **GitHub Activities**
Analyzing the contributions of developers to the popular github code repositories of different Algorand assets is one of the crucial goal of ASAlytics. This analysis intends to give insights to these contributions, running across the commits count, contributors count, forks count, stars count, pull requests and issues counts.Hover on the interactive plots below to observe the trend on ChoiceCoin.

 Thereafter, we achieved this by plotting the count of occurences per activity per repository.

In [10]:
github_plot(choice_data).activities_per_repo()

Moreso, we achieved the plot below by plotting the count of occurences per activity per date. 

In [11]:
github_plot(choice_data).activities_per_date()

## **Trend of Number of Contributors Per Date**

This individual plot of each activity intends to convey a pattern and show radical increase and decrease in the number of contributors of a specified asset. It alternates per day of the week, per weekdays, per day of the month and year. Thereafter, we achieved this by plotting the count of contributors per each date data. Hover on the interactive plots below to observe the trend of contributors for ChoiceCoin.

In [12]:
github_plot(choice_data).set_activity('no_of_contributors')

# **Trend of Number of Commits Per Date**

This individual plot of each activity intends to convey a pattern and show indepth increase and decrease in the number of commits on a specified asset. It alternates per day of the week, per weekdays, per day of the month and year. Thereafter, we achieved this by plotting the count of commits per each date data. Hover on the interactive plots below to observe the trend of commits on ChoiceCoin.

In [13]:
github_plot(choice_data).set_activity('no_of_commits')

# **Trend of Number of Forks Per Date**

This individual plot of each activity intends to convey a pattern and show radical increase and decrease in the number of forks on a specified asset github account. It alternates per day of the week, per weekdays, per day of the month and year. Thereafter, we achieved this by plotting the count of forks per each date data. Hover on the interactive plots below to observe the trend of forks on ChoiceCoin.

In [14]:
github_plot(choice_data).set_activity('no_of_forks')

# **Trend of Number of Stars Per Date**

This individual plot of each activity intends to convey a pattern and show radical increase and decrease in the number of stars on a specified asset github account. It alternates per day of the week, per weekdays, per day of the month and year. Thereafter, we achieved this by plotting the count of stars per each date data. Hover on the interactive plots below to observe the trend of stars on ChoiceCoin.

In [15]:
github_plot(choice_data).set_activity('no_of_stars')

# **Trend of Pull Request Per Date**

This individual plot of each activity intends to convey a pattern and show radical increase and decrease in the number of pull requests on a specified asset github account. It alternates per day of the week, per weekdays, per day of the month and year. Thereafter, we achieved this by plotting the count of pull requests per each date data. Hover on the interactive plots below to observe the trend of pull requests of ChoiceCoin.

In [16]:
github_plot(choice_data).set_activity('pull_requests')

# **Trend of Issues Per Date**

This individual plot of each activity intends to convey a pattern and show radical increase and decrease in the number of active issues on a specified asset github account. It alternates per day of the week, per weekdays, per day of the month and year. Thereafter, we achieved this by plotting the count of issues per each date data. Hover on the interactive plots below to observe the trend of issues of ChoiceCoin.

In [17]:
github_plot(choice_data).set_activity('issues')

# **AlgoFi**

Using the Algofiorg asset Github account as a Case Study.

## **GitHub Activities**

Analyzing the contributions of developers to the popular github code repositories of different Algorand assets is one of the crucial goal of ASAlytics. This analysis intends to give insights to these contributions, running across the commits count, contributors count, forks count, stars count, pull requests and issues counts. Hover on the interactive plots below to observe the trend for AlgofiOrg.

 Thereafter, we achieved this by plotting the count of occurences per activity per repository.

In [18]:
github_plot(algofi_data).activities_per_repo()

Moreso, we achieved this by plotting the count of occurences per activity per date.

In [19]:
github_plot(algofi_data).activities_per_date()

## **Trend of Number of Contributors Per Date**

This individual plot of each activity intends to convey a pattern and show radical increase and decrease in the number of contributors of a specified asset. It alternates per day of the week, per weekdays, per day of the month and year. Thereafter, we achieved this by plotting the count of contributors per each date data. Hover on the interactive plots below to observe the trend of contributors for AlgofiOrg.

In [20]:
github_plot(algofi_data).set_activity('no_of_contributors')

# **Trend of Number of Commits Per Date**

This individual plot of each activity intends to convey a pattern and show indepth increase and decrease in the number of commits on a specified asset. It alternates per day of the week, per weekdays, per day of the month and year. Thereafter, we achieved this by plotting the count of commits per each date data. Hover on the interactive plots below to observe the trend of commits on Algofiorg.

In [21]:
github_plot(algofi_data).set_activity('no_of_commits')

# **Trend of Number of Stars Per Date**
​
This individual plot of each activity intends to convey a pattern and show radical increase and decrease in the number of stars on a specified asset github account. It alternates per day of the week, per weekdays, per day of the month and year. Thereafter, we achieved this by plotting the count of stars per each date data. Hover on the interactive plots below to observe the trend of stars on AlgofiOrg.


In [22]:
github_plot(algofi_data).set_activity('no_of_stars')

# **Trend of Number of Forks Per Date**

This individual plot of each activity intends to convey a pattern and show radical increase and decrease in the number of forks on a specified asset github account. It alternates per day of the week, per weekdays, per day of the month and year. Thereafter, we achieved this by plotting the count of forks per each date data. Hover on the interactive plots below to observe the trend of forks on AlgofiOrg.

In [23]:
github_plot(algofi_data).set_activity('no_of_forks')

# **Trend of Pull Request Per Date**

This individual plot of each activity intends to convey a pattern and show radical increase and decrease in the number of pull requests on a specified asset github account. It alternates per day of the week, per weekdays, per day of the month and year. Thereafter, we achieved this by plotting the count of pull requests per each date data. Hover on the interactive plots below to observe the trend of pull requests of AlgofiOrg.

In [24]:
github_plot(algofi_data).set_activity('pull_requests')

# **Trend of Issues Per Date**

This individual plot of each activity intends to convey a pattern and show radical increase and decrease in the number of issues on a specified asset github account. It alternates per day of the week, per weekdays, per day of the month and year. Thereafter, we achieved this by plotting the count of issues per each date data. Hover on the interactive plots below to observe the trend of issues of AlgofiOrg

In [25]:
github_plot(algofi_data).set_activity('issues')

In [26]:
#algofi_data['repo_desc'].fillna("Algofi", inplace=True)

# **Shoshadev**

Using the ShoshaDev asset Github account as a Case Study.


## **GitHub Activities**

Analyzing the contributions of developers to the popular github code repositories of different Algorand assets is one of the crucial goal of ASAlytics. This analysis intends to give insights to these contributions, running across the commits count, contributors count, forks count, stars count, pull requests and issues counts. Thereafter, we achieved this by plotting the count of occurences per activity per date. Hover on the interactive plots below to observe the trend for ShoshaDev.

In [27]:
github_plot(shosha_data).activities_per_date()

## **Trend of Number of Contributors Per Date**

This individual plot of each activity intends to convey a pattern and show radical increase and decrease in the number of contributors of a specified asset. It alternates per day of the week, per weekdays, per day of the month and year. Thereafter, we achieved this by plotting the count of contributors per each date data. Hover on the interactive plots below to observe the trend of contributors for ShoshaDev.

In [28]:
github_plot(shosha_data).set_activity('no_of_contributors')

# **Trend of Number of Commits Per Date**

This individual plot of each activity intends to convey a pattern and show indepth increase and decrease in the number of commits on a specified asset. It alternates per day of the week, per weekdays, per day of the month and year. Thereafter, we achieved this by plotting the count of commits per each date data. Hover on the interactive plots below to observe the trend of commits on ShoshaDev.

In [29]:
github_plot(shosha_data).set_activity('no_of_commits')