# Sample using Github Event Data for Analysis

In [None]:
import pandas as pd
from fidap import fidap_client
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import config

In [None]:
fidap = fidap_client(db="sf_gcp", api_key=config.FIDAP_API_KEY)

# Github Event Questions

## How many PR's were made in the current month of February?

In [None]:
count_prs = fidap.sql("""select count(*) as number_of_prs from "FIDAP_DATA_SOURCES"."FIDAP_SCHEMA"."github" where "type" = 'PullRequestEvent'""")
print(f'There were {count_prs.values[0][0]} PRs made in this month so far.')

## Which Users Made the most PR's this month (excluding bots or users with bot in their names)?

In [None]:
users_prs = fidap.sql("""select count(*) as number_of_prs, "username" from "FIDAP_DATA_SOURCES"."FIDAP_SCHEMA"."github" where "type" = 'PullRequestEvent' and "username" not like '%bot%' group by "username" order by number_of_prs desc;""")

### Table of Top 5 Users

In [None]:
users_prs.head(5)

### WordCloud of Top 50 Users

In [None]:
values = {}
for count, user in users_prs.values:
    values[user] = count
wordcloud = WordCloud(max_font_size=100, max_words=50, background_color="white")
wordcloud.generate_from_frequencies(frequencies=values)
plt.figure(figsize=(10,5))
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.show()

## How many PR's were made per day for the month?

In [None]:
day_prs = fidap.sql("""select count(*) as number_of_prs, TO_DATE("created_at") as "date_created" from "FIDAP_DATA_SOURCES"."FIDAP_SCHEMA"."github" where "type" = 'PullRequestEvent' group by "date_created" order by number_of_prs desc;""")

In [None]:
day_prs['date_created'] = pd.to_datetime(day_prs['date_created'],unit='ms').dt.strftime('%Y-%m-%d')

### Table of Daily PR Count

In [None]:
day_prs

### Bar Chart to Show Tallies

In [None]:
ax = day_prs.plot.bar(x='date_created', y='number_of_prs', rot=90, figsize=(10,5))

## Which user made the most PR's to a particular Repo this month?

In [None]:
users_prs = fidap.sql("""select count(*) as number_of_prs, "username", "repo_name" from "FIDAP_DATA_SOURCES"."FIDAP_SCHEMA"."github" where "type" = 'PullRequestEvent' and "username" not like '%bot%' group by "repo_name", "username" order by number_of_prs desc;""")

### Table of Top 5 Users

In [None]:
users_prs.head(5)

## How many Code Pushes were made in the current month of February?

In [None]:
count_push = fidap.sql("""select count(*) as number_of_prs from "FIDAP_DATA_SOURCES"."FIDAP_SCHEMA"."github" where "type" = 'PushEvent'""")
print(f'There were {count_push.values[0][0]} code pushes made in this month so far.')

## Which users made the most code pushes to any repo?

In [None]:
users_pushes = fidap.sql("""select count(*) as number_of_pushes, "username" from "FIDAP_DATA_SOURCES"."FIDAP_SCHEMA"."github" where "type" = 'PushEvent' and "username" not like '%bot%' and "username" not like '%Bot%' group by "username" order by number_of_pushes desc;""")

### Table of Top Users

In [None]:
users_pushes.head(5)

### Word Cloud of Top Code Pushers

In [None]:
values = {}
for count, user in users_pushes.values:
    values[user] = count
wordcloud = WordCloud(max_font_size=100, max_words=50, background_color="white")
wordcloud.generate_from_frequencies(frequencies=values)
plt.figure(figsize=(10,5))
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.show()

## Which Repositories had the most PR's (Most Requested Changes) made in the month?

In [None]:
repo_prs = fidap.sql("""select "repo_name", count(*) as number_of_prs from "FIDAP_DATA_SOURCES"."FIDAP_SCHEMA"."github" where "type" = 'PullRequestEvent' group by "repo_name" order by number_of_prs desc;""")

### Top 10 Repos table

In [None]:
repo_prs.head(10)

## Which Repositories were starred the most in the month?

In [None]:
repo_stars = fidap.sql("""select "repo_name", count(*) as number_of_stars from "FIDAP_DATA_SOURCES"."FIDAP_SCHEMA"."github" where "type" = 'WatchEvent' group by "repo_name" order by number_of_stars desc;""")

In [None]:
repo_star_30 = repo_stars.head(30)

In [None]:
print(f'{repo_star_30.values[0][0]} was the most popular repository and was starred {repo_star_30.values[0][1]} times in the month to date')

### Most Starred/Popular Repos Bar Chart (Top 30)

In [None]:
repo_star_30.plot.bar(x='repo_name', y='number_of_stars', rot=90, figsize=(15,7))

### Top 10 Table

In [None]:
repo_stars.head(10)

### Breakdown by day

In [None]:
repo_stars_day = fidap.sql("""select "repo_name", count(*) as number_of_stars, TO_DATE("created_at") as "date_created" from "FIDAP_DATA_SOURCES"."FIDAP_SCHEMA"."github" where "type" = 'WatchEvent' group by "repo_name", "date_created" order by "date_created" asc;""")

In [None]:
repo_stars_day['date_created'] = pd.to_datetime(repo_stars_day['date_created'],unit='ms').dt.strftime('%Y-%m-%d')

In [None]:
repo_stars_day