# Connect to `GitHub GraphQL` API

In [1]:
import requests

In [2]:
# Open and read file
with open ('../../../Auth_Keys/graphql_api_auth.txt') as file:
    token = file.read()
    
# Specify the Authorization code
headers = {"Authorization": 'Bearer ' + token}

In [3]:
# Function to use requests.post to make an API call
def run_query(query): 
    request = requests.post('https://api.github.com/graphql', json={'query': query}, headers=headers)
    if request.status_code == 200:
        return request.json()
    else:
        raise Exception("Query failed to run by returning code of {}. {}".format(request.status_code, query))

---

# Random Repos

In [4]:
import pandas as pd
df = pd.read_pickle('../../../Files/more_repos/df_repo_5000.pk1')

In [5]:
df[200:]

Unnamed: 0,createdAt,description,totalForks,hasIssuesEnabled,hasWikiEnabled,id,totalIssues,licenseInfo,name,nameWithOwner,readMe,owner,primaryLanguage,totalPullRequests,totalCommits,totalStargazers,updatedAt
200,2012-04-30 09:06:50+00:00,"A community owned and driven, enterprise fork ...",93,True,True,MDEwOlJlcG9zaXRvcnk0MTgxMjIy,112,NOASSERTION,mageplus,mageplus/mageplus,Mage+\n========\n\nA community owned and drive...,mageplus,PHP,48,212.0,385,2019-08-13 15:02:06+00:00
201,2018-09-07 03:43:00+00:00,SVScanner - Scanner Vulnerability And MaSsive ...,31,True,False,MDEwOlJlcG9zaXRvcnkxNDc3NjY5NjE=,0,Apache-2.0,SVScanner,radenvodka/SVScanner,# SVScanner - Scanner Vulnerability And MaSsiv...,radenvodka,PHP,0,31.0,119,2019-09-09 11:20:27+00:00
202,2009-09-27 20:32:15+00:00,This project aims to be the Scala Incubator pr...,49,True,True,MDEwOlJlcG9zaXRvcnkzMTkyOTY=,40,,scala-arm,jsuereth/scala-arm,# Scala Automatic Resource Management\n\n[![Jo...,jsuereth,Scala,40,178.0,521,2019-09-05 09:32:23+00:00
203,2009-12-06 12:05:24+00:00,The Linux Scheduler Simulator,9,True,True,MDEwOlJlcG9zaXRvcnk0MDE5NTI=,0,,LinSched,jontore/LinSched,,jontore,C,0,4.0,13,2019-03-23 23:20:12+00:00
204,2012-04-06 04:12:28+00:00,Houdini things!,22,True,True,MDEwOlJlcG9zaXRvcnkzOTQ2NzY3,12,,Houdini-Toolbox,captainhammy/Houdini-Toolbox,This repository is for the Houdini Toolbox.\n\...,captainhammy,Python,1,422.0,92,2019-09-09 15:09:56+00:00
205,2019-04-09 23:06:17+00:00,Repository from the article published in Medium,21,True,True,MDEwOlJlcG9zaXRvcnkxODA0NjI1MjI=,0,,oauth2-spring-boot,marcusdacoregio/oauth2-spring-boot,# oauth2-spring-boot\nThis repository has the ...,marcusdacoregio,Java,0,30.0,49,2019-09-04 12:01:40+00:00
206,2015-02-22 07:46:03+00:00,A web spider for zhihu.com,350,True,True,MDEwOlJlcG9zaXRvcnkzMTE1NjM0OQ==,4,MIT,zhihu-spider,MorganZhang100/zhihu-spider,"# zhihu-spider\nA web spider for zhihu.com, wh...",MorganZhang100,Python,1,11.0,711,2019-09-07 08:19:02+00:00
207,2013-03-05 21:35:36+00:00,A teeny tiny smooth scroll script with ease-in...,121,True,True,MDEwOlJlcG9zaXRvcnk4NTg5Njkx,21,MIT,smoothScroll,alicelieutier/smoothScroll,"smoothScroll\n============\n\nA teeny tiny, st...",alicelieutier,HTML,31,73.0,517,2019-08-13 15:17:09+00:00
208,2016-04-08 18:35:05+00:00,Список рекомендаций по неусложнению жизни себе...,101,True,False,MDEwOlJlcG9zaXRvcnk1NTgwMTI1Mw==,7,MIT,idiomatic-pre-CSS,nicothin/idiomatic-pre-CSS,,nicothin,HTML,6,,343,2019-09-06 20:03:42+00:00
209,2016-06-06 13:36:38+00:00,Native Python WFDB package,149,True,True,MDEwOlJlcG9zaXRvcnk2MDUzMDY2Nw==,155,MIT,wfdb-python,MIT-LCP/wfdb-python,,MIT-LCP,Jupyter Notebook,22,592.0,290,2019-09-07 18:08:56+00:00


---

---

# `Pull Requests`

## Query x2

In [6]:
# Initial query
pr_query = '''
{{
  repositoryOwner(login: "{owner}") {{
    repository(name: "{name}") {{
      pullRequests(first: 50) {{
        pageInfo {{
          endCursor
          hasNextPage
        }}
        nodes {{
          createdAt
          updatedAt
          closedAt
          title
          mergedBy {{
            login
          }}
          author {{
            login
            ... on User {{
              company
            }}
          }}
          authorAssociation
          files {{
            totalCount
          }}
          state
          resourcePath
          bodyText
          comments(first: 25) {{
            totalCount
            nodes {{
              author {{
                login
                ... on User {{
                  company
                }}
              }}
              authorAssociation
              bodyText
            }}
          }}
        }}
      }}
    }}
  }}
  rateLimit {{
    limit
    cost
    remaining
    resetAt
  }}
}}
'''

# Subsequent queries
pr_query_2 = '''
{{
  repositoryOwner(login: "{owner}") {{
    repository(name: "{name}") {{
      pullRequests(first: 50, after:"{end_cursor}") {{
        pageInfo {{
          endCursor
          hasNextPage
        }}
        nodes {{
          createdAt
          updatedAt
          closedAt
          title
          mergedBy {{
            login
          }}
          author {{
            login
            ... on User {{
              company
            }}
          }}
          authorAssociation
          files {{
            totalCount
          }}
          state
          resourcePath
          bodyText
          comments(first: 25) {{
            totalCount
            nodes {{
              author {{
                login
                ... on User {{
                  company
                }}
              }}
              authorAssociation
              bodyText
            }}
          }}
        }}
      }}
    }}
  }}
  rateLimit {{
    limit
    cost
    remaining
    resetAt
  }}
}}
'''

pr_variables = {
    "end_cursor": "",
    "owner":"",
    "name":""
}

## Functions to query pr

In [36]:
def pr_to_df(result):
    
    # Get the keys of nodes.
    data = result['data']['repositoryOwner']['repository']['pullRequests']['nodes']
    
    # Put the data into a dataframe
    df_pr = pd.DataFrame(data)
    
    return df_pr

def pr_get_page_info(result):
    cur = result['data']['repositoryOwner']['repository']['pullRequests']['pageInfo']['endCursor']
    has_cur = result['data']['repositoryOwner']['repository']['pullRequests']['pageInfo']['hasNextPage']
    return cur, has_cur

def pr_result_query(query):
    # Run first query
    result = run_query(query)
    if result['data']['repositoryOwner']['repository'] != None:
        # Save to DF
        df_pr = pr_to_df(result)

        # Get end cursor and has cursor
        pr_variables['end_cursor'], has_cursor = pr_get_page_info(result)

        # Limit
        limit = result['data']['rateLimit']['remaining']

        return df_pr, has_cursor, limit
    else:
        return None

---

---

# `Issues`

## Query x2

In [8]:
is_query = '''
{{
  repositoryOwner(login:"{owner}") {{
    repository(name:"{name}") {{
      issues(first:50) {{
        pageInfo {{
          endCursor
          hasNextPage
        }}
        nodes {{
          createdAt
          closedAt
          updatedAt
          title
          number
          author {{
            login
            ... on User {{
              company
            }}
          }}
          authorAssociation
          state
          bodyText
          comments(first:20) {{
            totalCount
            nodes {{
              author {{
                login
                ... on User {{
                  company
                }}
              }}
              createdAt
              authorAssociation
              bodyText
            }}
          }}
        }}
      }}
    }}
  }}
  rateLimit {{
    limit
    cost
    remaining
    resetAt
  }}
}}
'''

is_query_2 = '''
{{
  repositoryOwner(login:"{owner}") {{
    repository(name:"{name}") {{
      issues(first:50, after:"{end_cursor}") {{
        pageInfo {{
          endCursor
          hasNextPage
        }}
        nodes {{
          createdAt
          closedAt
          updatedAt
          title
          number
          author {{
            login
            ... on User {{
              company
            }}
          }}
          authorAssociation
          state
          bodyText
          comments(first:20) {{
            totalCount
            nodes {{
              author {{
                login
                ... on User {{
                  company
                }}
              }}
              createdAt
              authorAssociation
              bodyText
            }}
          }}
        }}
      }}
    }}
  }}
  rateLimit {{
    limit
    cost
    remaining
    resetAt
  }}
}}
'''

is_variables = {
    "end_cursor": "",
    "owner":"",
    "name":""
}

## Function to query issues

In [9]:
def is_to_df(result):
    
    # Get the keys of nodes.
    data = result['data']['repositoryOwner']['repository']['issues']['nodes']
    
    # Put the data into a dataframe
    df_is = pd.DataFrame(data)
    
    return df_is

def is_get_page_info(result):
    cur = result['data']['repositoryOwner']['repository']['issues']['pageInfo']['endCursor']
    has_cur = result['data']['repositoryOwner']['repository']['issues']['pageInfo']['hasNextPage']
    return cur, has_cur

def is_result_query(query):
    
    # Run first query
    result = run_query(query)
    
    if result['data']['repositoryOwner']['repository'] != None:
        # Save to DF
        df_is = is_to_df(result)

        # Get end cursor and has cursor
        is_variables['end_cursor'], has_cursor = is_get_page_info(result)

        # Limit
        limit = result['data']['rateLimit']['remaining']

        return df_is, has_cursor, limit
    else:
        return None

---

---

# `Commits`

## Query x2

In [56]:
cm_query = '''
{{
  repository(owner: "{owner}", name: "{name}") {{
    ref(qualifiedName: "master") {{
      target {{
        ... on Commit {{
          history(first: 100) {{
            pageInfo {{
              hasNextPage
              endCursor
            }}
            nodes {{
              oid
              messageHeadline
              committedDate
              committer {{
                user {{
                  login
                  company
                }}
              }}
              status {{
                id
                state
              }}
              associatedPullRequests(first: 3) {{
                nodes {{
                  id
                  title
                  author {{
                    login
                  }}
                  authorAssociation
                  createdAt
                  updatedAt
                  closedAt
                  number
                  state
                }}
              }}
            }}
          }}
        }}
      }}
    }}
  }}
  rateLimit {{
    limit
    cost
    remaining
    resetAt
  }}
}}
'''

cm_query_2 = '''
{{
  repository(owner: "{owner}", name: "{name}") {{
    ref(qualifiedName: "master") {{
      target {{
        ... on Commit {{
          history(first: 50, after:"{end_cursor}") {{
            pageInfo {{
              hasNextPage
              endCursor
            }}
            nodes {{
              oid
              messageHeadline
              committedDate
              committer {{
                user {{
                  login
                  company
                }}
              }}
              status {{
                id
                state
              }}
              associatedPullRequests(first: 3) {{
                nodes {{
                  id
                  title
                  author {{
                    login
                  }}
                  authorAssociation
                  createdAt
                  updatedAt
                  closedAt
                  number
                  state
                }}
              }}
            }}
          }}
        }}
      }}
    }}
  }}
  rateLimit {{
    limit
    cost
    remaining
    resetAt
  }}
}}
'''

cm_variables = {
    "end_cursor": "",
    "owner":"",
    "name":""
}

## Functions for Commits

In [50]:
def cm_to_df(result):
    
    # Get the keys of nodes.
    data = result['data']['repository']['ref']['target']['history']['nodes']
    
    # Put the data into a dataframe
    df_commits = pd.DataFrame(data)
    
    return df_commits

def cm_get_page_info(result):
    cur = result['data']['repository']['ref']['target']['history']['pageInfo']['endCursor']
    has_cur = result['data']['repository']['ref']['target']['history']['pageInfo']['hasNextPage']
    return cur, has_cur

def cm_result_query(query):
    
    # Run first query
    result = run_query(query)
    
    if result['data']['repository'] != None:
        # Save to DF
        df_commits = cm_to_df(result)

        # Get end cursor and has cursor
        cm_variables['end_cursor'], has_cursor = cm_get_page_info(result)

        # Limit
        limit = result['data']['rateLimit']['remaining']

        return df_commits, has_cursor, limit
    else:
        return None

---

---

# `Stargazers`

## Query x2

In [12]:
# Query star gazers
st_query = '''
{{
  repositoryOwner(login: "{owner}") {{
    id
    login
    repository(name: "{name}") {{
      id
      name
      createdAt
      updatedAt
      description
      licenseInfo {{
        spdxId
      }}
      stargazers(first:100) {{
        totalCount
        pageInfo {{
          endCursor
          hasNextPage
        }}
        edges {{
          starredAt
          node {{
            createdAt
            updatedAt
            id
            login
            company
          }}
        }}
      }}
    }}
  }}
  rateLimit {{
    limit
    cost
    remaining
    resetAt
  }}
}}
'''

st_query_2 = '''
{{
  repositoryOwner(login: "{owner}") {{
    id
    login
    repository(name: "{name}") {{
      id
      name
      createdAt
      updatedAt
      description
      licenseInfo {{
        spdxId
      }}
      stargazers(first:100, after:"{end_cursor}") {{
        totalCount
        pageInfo {{
          endCursor
          hasNextPage
        }}
        edges {{
          starredAt
          node {{
            createdAt
            updatedAt
            id
            login
            company
          }}
        }}
      }}
    }}
  }}
  rateLimit {{
    limit
    cost
    remaining
    resetAt
  }}
}}
'''

st_variables = {
    "end_cursor": "",
    "owner":"",
    "name":""
}

## Functions for stargazers

In [13]:
def st_to_df(result):
    
    # Get the keys of nodes.
    data = result['data']['repositoryOwner']['repository']['stargazers']['edges']
    
    # Put the data into a dataframe
    df_star = pd.DataFrame(data)
    
    return df_star

def st_get_page_info(result):
    cur = result['data']['repositoryOwner']['repository']['stargazers']['pageInfo']['endCursor']
    has_cur = result['data']['repositoryOwner']['repository']['stargazers']['pageInfo']['hasNextPage']
    return cur, has_cur

def st_result_query(query):
    # Run first query
    result = run_query(query)
    
    if result['data']['repositoryOwner']['repository'] != None:
        # Save to DF
        df_star = st_to_df(result)

        # Get end cursor and has cursor
        st_variables['end_cursor'], has_cursor = st_get_page_info(result)

        # Limit
        limit = result['data']['rateLimit']['remaining']

        return df_star, has_cursor, limit
    else:
        return None

---

---

# `Run Queries`

In [17]:
import time

## Set index from the random repos

In [44]:
i = 200

## Make new Folder to store the repo data

In [45]:
import os

try:
    os.mkdir('../../../Files/more_repos2/' + df['owner'][i] + '-' + df['name'][i])
    print('Made Folder')
except:
    print('Folder exists')

Made Folder


## Run PR Query

In [14]:
import os
import time

In [15]:
def pr_loop(df_pr, has_cursor_pr,limit):
    # Run While Loop below
    while has_cursor_pr and (limit >= 0):

        # Run next queries
        df_pr_2, has_cursor_pr, limit = pr_result_query(pr_query_2.format(**pr_variables))

        # Concat to existing df_star
        df_pr = pd.concat([df_pr, df_pr_2])

        # Print limit and cursor
        print('Next limit is: ', limit)
        print('Next cursor: ', has_cursor_pr)
        print('Shape:', df_pr.shape)

    else:

        df_pr.to_pickle('../../../Files/more_repos2/pr/' + pr_variables['owner'] + '-' + pr_variables['name'] + '/' + pr_variables['owner'] 
            + '_' + pr_variables['name'] + '_pr' + '.pk1')


def pr_looper(df_pr, has_cursor_pr,limit):
    try:
        print('try_loop')
        pr_loop(df_pr, has_cursor_pr,limit)
    except Exception as e:
        print(e)
        print('except_loop')
        time.sleep(60)
        pr_looper(df_pr, has_cursor_pr,limit)

In [42]:
%%time

for i in range(342, 1000):
    print('Number:', i)
    try:
        os.mkdir('../../../Files/more_repos2/pr/' + df['owner'][i] + '-' + df['name'][i])
        print('Made Folder: ', df['owner'][i] + '-' + df['name'][i])
    except:
        print('Folder exists')
        
    # Set owner and name for pr_variables by changing the index
    pr_variables['owner'] = df['owner'][i]
    pr_variables['name'] = df['name'][i]

    ##### Comment Out this section if there's a error #####
    # Run first query
    try:
        df_pr, has_cursor_pr, limit = pr_result_query(pr_query.format(**pr_variables))

        print('Starting limit is: ', limit)
        print('Start cursor: ', has_cursor_pr, '\n')
        
    except TypeError:
        print('Does not exist: ', df['owner'][i] + '-' + df['name'][i], '\n')
        continue

    ##### Comment Out this section if there's a error #####

    pr_looper(df_pr, has_cursor_pr, limit)

Number: 342
Made Folder kelseyhightower-kubernetes-the-hard-way
Starting limit is:  4986
Start cursor:  True 

try_loop
Next limit is:  4985
Next cursor:  True 

Shape: (100, 12)
Next limit is:  4984
Next cursor:  True 

Shape: (150, 12)
Next limit is:  4983
Next cursor:  True 

Shape: (200, 12)
Next limit is:  4982
Next cursor:  False 

Shape: (226, 12)
Number: 343
Made Folder jlipps-yiewd
Starting limit is:  4981
Start cursor:  False 

try_loop
Number: 344
Made Folder felis-USB_Host_Shield_2.0
Starting limit is:  4980
Start cursor:  True 

try_loop
Next limit is:  4979
Next cursor:  True 

Shape: (100, 12)
Next limit is:  4978
Next cursor:  False 

Shape: (128, 12)
Number: 345
Made Folder joinbyjoy-wechat_xcx_751
Starting limit is:  4977
Start cursor:  False 

try_loop
Number: 346
Made Folder USTCPCS-CVPR2018_attention
Starting limit is:  4976
Start cursor:  False 

try_loop
Number: 347
Made Folder github-balanced-employee-ip-agreement
Starting limit is:  4975
Start cursor:  False 



# Old Pr query

In [59]:
%%time

i=210

try:
    os.mkdir('../../../Files/more_repos2/pr' + df['owner'][i] + '-' + df['name'][i])
    print('Made Folder')
except:
    print('Folder exists')

# Set owner and name for pr_variables by changing the index
pr_variables['owner'] = df['owner'][i]
pr_variables['name'] = df['name'][i]

##### Comment Out this section if there's a error #####
# Run first query
df_pr, has_cursor_pr, limit = pr_result_query(pr_query.format(**pr_variables))

print('Starting limit is: ', limit)
print('Start cursor: ', has_cursor_pr, '\n')

##### Comment Out this section if there's a error #####

while has_cursor_pr and (limit >= 0):

    # Run next queries
    df_pr_2, has_cursor_pr, limit = pr_result_query(pr_query_2.format(**pr_variables))

    # Concat to existing df_star
    df_pr = pd.concat([df_pr, df_pr_2])

    # Print limit and cursor
    print('Next limit is: ', limit)
    print('Next cursor: ', has_cursor_pr, '\n')
    print('Shape:', df_pr.shape)

Made Folder
Starting limit is:  4977
Start cursor:  True 

Next limit is:  4976
Next cursor:  False 

Shape: (80, 12)
Wall time: 8.79 s


### Save PR as a pickle file

In [1519]:
df_pr.to_pickle('../../../Files/more_repos2/' + pr_variables['owner'] + '-' + pr_variables['name'] + '/' + pr_variables['owner'] 
                + '_' + pr_variables['name'] + '_pr' + '.pk1')

---

## Run Issues Query

In [None]:
import os
import time

def is_loop(df_is, has_cursor_is, limit):
    # Run While Loop below
    while has_cursor_is and (limit >= 0):

        # Run next queries
        df_is_2, has_cursor_is, limit = is_result_query(is_query_2.format(**is_variables))

        # Concat to existing df_star
        df_is = pd.concat([df_is, df_is_2])

        # Print limit and cursor
        print('Next limit is: ', limit)
        print('Next cursor: ', has_cursor_is)
        print('Shape:', df_is.shape)

    else:

        df_is.to_pickle('../../../Files/more_repos2/is/' + is_variables['owner'] + '-' + is_variables['name'] + '/' + cm_variables['owner'] 
            + '_' + is_variables['name'] + '_is' + '.pk1')

def is_looper(df_is, has_cursor_is, limit):
    try:
        print('try_loop')
        is_loop(df_is, has_cursor_is, limit)
    except Exception as e:
        print(e)
        print('except_loop')
        time.sleep(60)
        is_looper(df_is, has_cursor_is, limit)

In [None]:
%%time

for i in range(200, 1000):
    print('Number:', i)
    try:
        os.mkdir('../../../Files/more_repos2/is/' + df['owner'][i] + '-' + df['name'][i])
        print('Made Folder: ', df['owner'][i] + '-' + df['name'][i])
    except:
        print('Folder exists')
        
    # Set owner and name for is_variables by changing the index
    is_variables['owner'] = df['owner'][i]
    is_variables['name'] = df['name'][i]

    ##### Comment Out this section if there's a error #####
    # Run first query
    try:
        df_is, has_cursor_is, limit = is_result_query(is_query.format(**is_variables))

        print('Starting limit is: ', limit)
        print('Start cursor: ', has_cursor_is, '\n')
        
    except TypeError:
        print('Does not exist: ', df['owner'][i] + '-' + df['name'][i], '\n')
        continue

    ##### Comment Out this section if there's a error #####

    is_looper(df_is, has_cursor_is, limit)

## Old Issues Query

In [1521]:
# %%time

# # Set owner and name for pr_variables by changing the index
# is_variables['owner'] = df['owner'][i]
# is_variables['name'] = df['name'][i]

# ##### Comment Out this section if there's a error #####
# # Run first query
# df_is, has_cursor_is, limit = is_result_query(is_query.format(**is_variables))

# print('Starting limit is: ', limit)
# print('Start cursor: ', has_cursor_is, '\n')

# ##### Comment Out this section if there's a error #####

# # Run While Loop below
# while has_cursor_is and (limit >= 0):
    
#     # Run next queries
#     df_is_2, has_cursor_is, limit = is_result_query(is_query_2.format(**is_variables))

#     # Concat to existing df_star
#     df_is = pd.concat([df_is, df_is_2])
    
#     # Print limit and cursor
#     print('Next limit is: ', limit)
#     print('Next cursor: ', has_cursor_is, '\n')

Starting limit is:  4553
Start cursor:  False 

Wall time: 1.27 s


### Save Issues as a pickle file

In [1522]:
# df_is.to_pickle('../../../Files/more_repos/' + is_variables['owner'] + '-' + is_variables['name'] + '/' + is_variables['owner'] 
#                 + '_' + is_variables['name'] + '_is' + '.pk1')

---

## Run Commits Query

In [52]:
import os
import time

def cm_loop(df_cm, has_cursor_cm, limit):
    # Run While Loop below
    while has_cursor_cm and (limit >= 0):

        # Run next queries
        df_cm_2, has_cursor_cm, limit = cm_result_query(cm_query_2.format(**cm_variables))

        # Concat to existing df_star
        df_cm = pd.concat([df_cm, df_cm_2])

        # Print limit and cursor
        print('Next limit is: ', limit)
        print('Next cursor: ', has_cursor_cm)
        print('Shape:', df_cm.shape)

    else:

        df_cm.to_pickle('../../../Files/more_repos2/cm/' + cm_variables['owner'] + '-' + cm_variables['name'] + '/' + cm_variables['owner'] 
            + '_' + cm_variables['name'] + '_cm' + '.pk1')

def cm_looper(df_cm, has_cursor_cm, limit):
    try:
        print('try_loop')
        cm_loop(df_cm, has_cursor_cm, limit)
    except Exception as e:
        print(e)
        print('except_loop')
        time.sleep(60)
        cm_looper(df_cm, has_cursor_cm, limit)

In [None]:
%%time

for i in range(239, 1000):
    print('Number:', i)
    try:
        os.mkdir('../../../Files/more_repos2/cm/' + df['owner'][i] + '-' + df['name'][i])
        print('Made Folder: ', df['owner'][i] + '-' + df['name'][i])
    except:
        print('Folder exists')
        
    # Set owner and name for cm_variables by changing the index
    cm_variables['owner'] = df['owner'][i]
    cm_variables['name'] = df['name'][i]

    ##### Comment Out this section if there's a error #####
    # Run first query
    try:
        df_cm, has_cursor_cm, limit = cm_result_query(cm_query.format(**cm_variables))

        print('Starting limit is: ', limit)
        print('Start cursor: ', has_cursor_cm, '\n')
        
    except TypeError:
        print('Does not exist: ', df['owner'][i] + '-' + df['name'][i], '\n')
        continue

    ##### Comment Out this section if there's a error #####

    cm_looper(df_cm, has_cursor_cm, limit)

Number: 239
Made Folder:  builtwithluv-ZenFocus
Starting limit is:  4908
Start cursor:  True 

try_loop
Next limit is:  4907
Next cursor:  True
Shape: (150, 6)
Next limit is:  4906
Next cursor:  True
Shape: (200, 6)
Next limit is:  4905
Next cursor:  True
Shape: (250, 6)
Next limit is:  4904
Next cursor:  True
Shape: (300, 6)
Next limit is:  4903
Next cursor:  True
Shape: (350, 6)
Next limit is:  4902
Next cursor:  True
Shape: (400, 6)
Next limit is:  4901
Next cursor:  True
Shape: (450, 6)
Next limit is:  4900
Next cursor:  True
Shape: (500, 6)
Next limit is:  4899
Next cursor:  False
Shape: (545, 6)
Number: 240
Made Folder:  hooleyhoop-HooleyBits
Starting limit is:  4898
Start cursor:  True 

try_loop
Next limit is:  4897
Next cursor:  True
Shape: (150, 6)
Next limit is:  4896
Next cursor:  True
Shape: (200, 6)
Next limit is:  4895
Next cursor:  True
Shape: (250, 6)
Next limit is:  4894
Next cursor:  True
Shape: (300, 6)
Next limit is:  4893
Next cursor:  False
Shape: (326, 6)
Number

In [55]:
# Revisit 238
print(i)

238


## Old Commits Query

In [1523]:
# %%time

# # Set index
# cm_variables['owner'] = df['owner'][i]
# cm_variables['name'] = df['name'][i]

# ##### Comment Out this section if there's an error #####

# # Run first query
# df_commits, has_cursor_cm, limit = cm_result_query(cm_query.format(**cm_variables))

# print('Starting limit is: ', limit)
# print('Start cursor: ', has_cursor_cm, '\n')

# ##### Comment Out this section if there's an error #####

# # Run While Loop below
# while has_cursor_cm and (limit >= 0):
    
#     # Run next queries
#     df_commits_2, has_cursor_cm, limit = cm_result_query(cm_query_2.format(**cm_variables))

#     # Concat to existing df_star
#     df_commits = pd.concat([df_commits, df_commits_2])
    
#     # Print limit and cursor
#     print('Next limit is: ', limit)
#     print('Next cursor: ', has_cursor_cm, '\n')

Starting limit is:  4552
Start cursor:  False 

Wall time: 1.62 s


In [1524]:
# Test if query exists
# run_query(cm_query.format(**cm_variables))

### Save Commits as a pickle file

In [1525]:
# df_commits.to_pickle('../../../Files/more_repos/' + cm_variables['owner'] + '-' + cm_variables['name'] + '/' + cm_variables['owner'] 
#                 + '_' + cm_variables['name'] + '_cm' + '.pk1')

---

## Run Stargazer Query

In [None]:
import os
import time

def st_loop(df_st, has_cursor_st, limit):
    # Run While Loop below
    while has_cursor_st and (limit >= 0):

        # Run next queries
        df_st_2, has_cursor_st, limit = st_result_query(st_query_2.format(**st_variables))

        # Concat to existing df_star
        df_st = pd.concat([df_st, df_st_2])

        # Print limit and cursor
        print('Next limit is: ', limit)
        print('Next cursor: ', has_cursor_st)
        print('Shape:', df_st.shape)

    else:

        df_st.to_pickle('../../../Files/more_repos2/st/' + st_variables['owner'] + '-' + st_variables['name'] + '/' + cm_variables['owner'] 
            + '_' + st_variables['name'] + '_st' + '.pk1')

def st_looper(df_st, has_cursor_st, limit):
    try:
        print('try_loop')
        st_loop(df_st, has_cursor_st, limit)
    except Exception as e:
        print(e)
        print('except_loop')
        time.sleep(60)
        st_looper(df_st, has_cursor_st, limit)

In [None]:
%%time

for i in range(200, 1000):
    print('Number:', i)
    try:
        os.mkdir('../../../Files/more_repos2/st/' + df['owner'][i] + '-' + df['name'][i])
        print('Made Folder: ', df['owner'][i] + '-' + df['name'][i])
    except:
        print('Folder exists')
        
    # Set owner and name for st_variables by changing the index
    st_variables['owner'] = df['owner'][i]
    st_variables['name'] = df['name'][i]

    ##### Comment Out this section if there's a error #####
    # Run first query
    try:
        df_st, has_cursor_st, limit = st_result_query(st_query.format(**st_variables))

        print('Starting limit is: ', limit)
        print('Start cursor: ', has_cursor_st, '\n')
        
    except TypeError:
        print('Does not exist: ', df['owner'][i] + '-' + df['name'][i], '\n')
        continue

    ##### Comment Out this section if there's a error #####

    st_looper(df_st, has_cursor_st, limit)

## Old Stargazer Query

In [1526]:
# %%time

# # Set index
# st_variables['owner'] = df['owner'][i]
# st_variables['name'] = df['name'][i]

# ##### Comment Out this section if there's an error #####

# # Run first query
# df_star, has_cursor_st, limit = st_result_query(st_query.format(**st_variables))

# print('Starting limit is: ', limit)
# print('Start cursor: ', has_cursor_st, '\n')

# ##### Comment Out this section if there's an error #####

# # Run While Loop below
# while has_cursor_st and (limit >= 0):
    
#     # Run next queries
#     df_star_2, has_cursor_st, limit = st_result_query(st_query_2.format(**st_variables))

#     # Concat to existing df_star
#     df_star = pd.concat([df_star, df_star_2])
    
#     # Print limit and cursor
#     print('Next limit is: ', limit)
#     print('Next cursor: ', has_cursor_st, '\n')

Starting limit is:  4551
Start cursor:  True 

Next limit is:  4550
Next cursor:  True 

Next limit is:  4549
Next cursor:  True 

Next limit is:  4548
Next cursor:  True 

Next limit is:  4547
Next cursor:  False 

Wall time: 2.66 s


### Save stargazers to pickle

In [1527]:
# df_star.to_pickle('../../../Files/more_repos/' + st_variables['owner'] + '-' + st_variables['name'] + '/' + st_variables['owner'] 
#                 + '_' + st_variables['name'] + '_st' + '.pk1')

---