# Find the Repos Available in your Database, and What Repository Groups They Are In

## Connect to your database

In [1]:
import psycopg2
import pandas as pd 
import sqlalchemy as salc
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
import datetime
import json
warnings.filterwarnings('ignore')

with open("config.json") as config_file:
    config = json.load(config_file)

database_connection_string = 'postgres+psycopg2://{}:{}@{}:{}/{}'.format(config['user'], config['password'], config['host'], config['port'], config['database'])

dbschema='augur_data'
engine = salc.create_engine(
    database_connection_string,
    connect_args={'options': '-csearch_path={}'.format(dbschema)})

### Retrieve Available Respositories

In [2]:
repolist = pd.DataFrame()

repo_query = salc.sql.text(f"""
            select repo_id, repo_name, programming_language, avg(code_complexity) as average_file_code_complexity, sum(total_lines) as lines_total 
            from 
            (
            select repo_id, max(rl_analysis_date), code_complexity, file_path, programming_language, total_lines
            from repo_labor, repo
            where repo_labor.repo_name = repo.repo_name
            group by repo_id, programming_language, code_complexity, file_path, total_lines  
            order by programming_language
            ) b group by repo_id, programming_language 
            order by repo_id, lines_total desc;   

    """)

repolist = pd.read_sql(repo_query, con=engine)

display(repolist)

repolist.dtypes

Unnamed: 0,repo_id,programming_language,average_file_code_complexity,lines_total
0,25430,Python,5.19,31144
1,25430,JSON,0.00,3317
2,25430,Markdown,0.00,676
3,25430,License,0.00,674
4,25430,YAML,0.00,489
...,...,...,...,...
305,25460,Makefile,2.50,28
306,25460,SQL,0.00,10
307,25460,YAML,0.00,5
308,25461,Markdown,0.00,100


repo_id                           int64
programming_language             object
average_file_code_complexity    float64
lines_total                       int64
dtype: object

# Still in development below this line. 

In [7]:
#declare all repo ids you would like to produce charts for
repo_set = {25430}

#can be set as 'competitors' or 'repo'
#'competitors' will group graphs by type, so it is easy to compare across repos
# 'repo' will group graphs by repo so it is easy to look at all the contributor data for each repo
display_grouping = 'repo'

#if display_grouping is set to 'competitors', enter the repo ids you do no want to alias, if 'display_grouping' is set to repo the list will not effect anything
not_aliased_repos = [25430]

#group_by can be set as 'month' or 'year'
group_by = 'month'

#requirements for a contributor to be considered a repeat contributor
time = 365
num_contributions_required = 5

#specify dates for filtering
#if the end_date is in the future, the end_date will default to the current_date
begin_date = '2019-10-01'
end_date = '2021-04-31'

save_files = False

In [12]:
def pie_chart(input_df, repo_id, title = " {}: repo_labor {} from {} to {}", programming_language ='', lines_total=1):
    
    if type(repo_id) == type(repo_list):
        repo_ids = repo_id
    else:
        repo_ids = [repo_id]
        
    for repo_id in repo_ids:
        output_notebook()
        
        #create a copy of contributor dataframe
        driver_df = input_df.copy()
        
        #filter dataframe by repo_id
        driver_df = driver_df.loc[driver_df['repo_id'] == repo_id]
        


 
        #turn dict 'x' into a dataframe with columns 'contributor_type', and 'counts'
        data = pd.Series(x).reset_index(name='counts').rename(columns={'index':'programming_language'})

        data['angle'] = data['counts']/data['counts'].sum() * 2*pi
        data['color'] = ('#0072B2', '#E69F00')
        data['percentage'] = ((data['angle']/(2*pi))*100).round(2)
        
        #format title 
        title = title.format(repo_dict[repo_id], drive_by_contributors + repeat_contributors, begin_date, end_date)
        title_text_font_size = 18
        
        plot_width = 850
        
        #sets plot_width to width of title if title is wider than 850 pixels
        if len(title) * title_text_font_size / 2 > plot_width:
            plot_width = int(len(title) * title_text_font_size / 2)
        
        
        source = ColumnDataSource(data)
        
        #creates plot for chart
        p = figure(plot_height=450, plot_width =plot_width, title=title, 
                   toolbar_location=None, x_range=(-0.5, 1.3), tools = 'hover', tooltips = "@contributor_type", margin = (0, 0, 0, 0))

        wedge = p.wedge(x=0.87, y=1, radius=0.4, start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
            line_color=None, fill_color='color', legend_field='contributor_type', source=data)

        start_point = 0.88
        for i in range(0, len(data['percentage'])):
            #percentages
            p.add_layout(Label(x=-0.17, y= start_point + 0.13*(len(data['percentage']) - 1 - i), text='{}%'.format(data.iloc[i]['percentage']), 
                        render_mode='css', text_font_size = '15px', text_font_style= 'bold'))

            #contributors
            p.add_layout(Label(x=0.12, y= start_point + 0.13*(len(data['percentage']) - 1 - i), text='{}'.format(data.iloc[i]['counts']), 
                        render_mode='css', text_font_size = '15px', text_font_style= 'bold'))

        #put graph and caption plot together into one grid
        grid = gridplot([[plot], [caption_plot]])

        show(grid)
        
        if save_files:
            output_file = 'images/' + 'pie_chart' + '_' + repo_dict[repo_id] + '.png'
            export_png(grid, filename=output_file)

In [13]:

if display_grouping == 'repo':
    for repo_id in repo_set:
        pie_chart(repolist, repo_id=repo_id, programming_language = programming_language, lines_total = lines_total)
        
elif display_grouping == 'competitors':
    pie_chart(df, repo_id=repo_list, required_contributions = num_contributions_required, required_time = time)


NameError: name 'programming_language' is not defined