In [1]:
import pandas as pd
import requests
from datetime import datetime
from datetime import date
import seaborn as sns

In [2]:
query = open('main_query.txt', 'r').read()
response_data = []
github_token = open('env', 'r').read()
headers = { 'Authorization': f'bearer {github_token}' }

In [None]:
def makeRequest(query):
    print('Start making query')
    page = 0
    previous_cursor = 'null'
    while(page < 10):        
        print(f'Current page {page}')
        response = requests.post('https://api.github.com/graphql', headers=headers, json={'query':query})
        if response.status_code == 200:
            response_json = response.json()['data']['search']
            nodes = response_json['nodes']
            end_cursor = response_json['pageInfo']['endCursor']
            query = query.replace(previous_cursor, f'"{end_cursor}"')
            previous_cursor = f'"{end_cursor}"'
            print(previous_cursor)
            for item in nodes:
                response_data.append(item)
            print(f'Quantity of total nodes: {len(response_data)}')
            page += 1
        else:
            print(f'Some error has happened, status code:  {response.status_code}')
   

       
        
makeRequest(query)

Start making query
Current page 0
"Y3Vyc29yOjEwMA=="
Quantity of total nodes: 100
Current page 1
"Y3Vyc29yOjIwMA=="
Quantity of total nodes: 200
Current page 2
"Y3Vyc29yOjMwMA=="
Quantity of total nodes: 300
Current page 3
"Y3Vyc29yOjQwMA=="
Quantity of total nodes: 400
Current page 4
"Y3Vyc29yOjUwMA=="
Quantity of total nodes: 500
Current page 5
"Y3Vyc29yOjYwMA=="
Quantity of total nodes: 600
Current page 6
"Y3Vyc29yOjcwMA=="
Quantity of total nodes: 700
Current page 7


In [None]:
def convertJsonToDataFrame(response_data):
    df = pd.json_normalize(response_data)
    print('Finished converting json to csv')
    return df

df = convertJsonToDataFrame(response_data)

In [None]:
df.head()

In [None]:
def removeLastColumn(df):
    df = df.drop(labels='primaryLanguage', axis=1)
    return df
df = removeLastColumn(df)

In [None]:
df.head()

In [None]:
def convertDateToAgeInDays(createdAt):
    current_date = date.today()
    createdAt = datetime.fromisoformat(createdAt.replace('Z', '+00:00')).date()
    return (current_date - createdAt).days
    
df['age'] = df['createdAt'].apply(convertDateToAgeInDays)
df['lastUpdate'] = df['updatedAt'].apply(convertDateToAgeInDays)

In [None]:
df.head()

In [None]:
percents = []

def calculateIssuesPercent(open_issues, closed_issues):
    if closed_issues == 0:
        return 0
    percent = ( closed_issues /  open_issues ) * 100
    return "{:.2f}".format(percent)

for i, row in df.iterrows():
    percents.append(calculateIssuesPercent(row['open_issues.totalCount'], row['closed_issues.totalCount']))

df['percent_of_issues'] = percents
df.head()

In [None]:
def convertDataFrameToCsv(response_data):
    print('Dataframe converted to CSV')
    df.to_csv('pandas.csv')

convertDataFrameToCsv(df)

In [None]:
percent_median = df['percent_of_issues'].median()
print(f' Median: {percent_median}')
age_box_plot = sns.boxplot(y='age', data=df, color='red', width=0.1, linewidth=0.8)
median = df['age'].median()
print(f' Median: {median}')

In [None]:
age_box_plot.figure.savefig("images/question1_age.png")

In [None]:
q1 = df['age'].quantile(0.25)


In [None]:
q3 = df['age'].quantile(0.75)

In [None]:
iqr = q3 - q1

In [None]:
max = q3 + (1.5 * iqr)
print(max)

In [None]:
min = q1 - 1.5 * iqr
print(min)

In [None]:
pullRequest = sns.boxplot(y='pullRequests.totalCount', data=df, color='red', width=0.1, linewidth=0.8)
median = df['pullRequests.totalCount'].median()
print(f' Median: {median}')