### Using the Request Library to download web Pages

In [1]:
import requests

Scraping has two parts: get the web page then parse the web page

In [2]:
# get the web page
topics_url = 'https://github.com/topics'

In [3]:
response = requests.get(topics_url) # creates a response object

In [5]:
response.status_code # to check if the request was succesful

200

response.text  # all the contents of page 

In [6]:
len(response.text)

196178

In [14]:
page_contents = response.text

In [15]:
page_contents[:1000]

'\n\n<!DOCTYPE html>\n<html\n  lang="en"\n  \n  data-color-mode="auto" data-light-theme="light" data-dark-theme="dark"\n  data-a11y-animated-images="system" data-a11y-link-underlines="true"\n  >\n\n\n\n  <head>\n    <meta charset="utf-8">\n  <link rel="dns-prefetch" href="https://github.githubassets.com">\n  <link rel="dns-prefetch" href="https://avatars.githubusercontent.com">\n  <link rel="dns-prefetch" href="https://github-cloud.s3.amazonaws.com">\n  <link rel="dns-prefetch" href="https://user-images.githubusercontent.com/">\n  <link rel="preconnect" href="https://github.githubassets.com" crossorigin>\n  <link rel="preconnect" href="https://avatars.githubusercontent.com">\n\n  \n\n  <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/light-efd2f2257c96.css" /><link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/dark-6b1e37da2254.css" /><link data-color-theme="dark_dimmed" crossorig

Saving HTML

In [16]:
with open('webpage.html', 'w', encoding='utf-8') as f:
    f.write(page_contents)

Need to grab the right information from this html file 

### Using Beautiful Soup to parse and extract information

In [18]:
from bs4 import BeautifulSoup  # from module bs4 we are importing the beautiful class

In [19]:
doc = BeautifulSoup(page_contents,'html.parser')

In [20]:
type(doc)

bs4.BeautifulSoup

Now we have a beautiful soup object, we can actually find things inside the web page by doing some queries

In [21]:
p_tags = doc.find_all('p')

In [22]:
len(p_tags)

69

In [23]:
p_tags[:5]

[<p>We read every piece of feedback, and take your input very seriously.</p>,
 <p class="text-small color-fg-muted">
             To see all available qualifiers, see our <a class="Link--inTextBlock" href="https://docs.github.com/search-github/github-code-search/understanding-github-code-search-syntax">documentation</a>.
           </p>,
 <p class="f4 color-fg-muted col-md-6 mx-auto">Browse popular topics on GitHub.</p>,
 <p class="f3 lh-condensed text-center Link--primary mb-0 mt-1">
         Swift
       </p>,
 <p class="f5 color-fg-muted text-center mb-0 mt-1">Swift is a modern programming language focused on safety, performance, and expressivity.</p>]

In [41]:
selection_class = 'f3 lh-condensed mb-0 mt-1 Link--primary'

topic_title_tags = doc.find_all('p',{'class': selection_class})

In [42]:
len(topic_title_tags)

30

In [43]:
topic_title_tags[:5]

[<p class="f3 lh-condensed mb-0 mt-1 Link--primary">3D</p>,
 <p class="f3 lh-condensed mb-0 mt-1 Link--primary">Ajax</p>,
 <p class="f3 lh-condensed mb-0 mt-1 Link--primary">Algorithm</p>,
 <p class="f3 lh-condensed mb-0 mt-1 Link--primary">Amp</p>,
 <p class="f3 lh-condensed mb-0 mt-1 Link--primary">Android</p>]

In [44]:
desc_selector = 'f5 color-fg-muted mb-0 mt-1'
topic_desc_tags = doc.findAll('p',{'class': desc_selector})

In [45]:
topic_desc_tags[:5]

[<p class="f5 color-fg-muted mb-0 mt-1">
           3D refers to the use of three-dimensional graphics, modeling, and animation in various industries.
         </p>,
 <p class="f5 color-fg-muted mb-0 mt-1">
           Ajax is a technique for creating interactive web applications.
         </p>,
 <p class="f5 color-fg-muted mb-0 mt-1">
           Algorithms are self-contained sequences that carry out a variety of tasks.
         </p>,
 <p class="f5 color-fg-muted mb-0 mt-1">
           Amp is a non-blocking concurrency library for PHP.
         </p>,
 <p class="f5 color-fg-muted mb-0 mt-1">
           Android is an operating system built by Google designed for mobile devices.
         </p>]

In [48]:
topic_link_tags = doc.find_all('a',{'class': 'no-underline flex-1 d-flex flex-column'})

In [49]:
len(topic_link_tags)

30

In [52]:
topic_link_tags[0]['href']

'/topics/3d'

In [53]:
topic0_url = "https://github.com" + topic_link_tags[0]['href']
print(topic0_url)

https://github.com/topics/3d


In [57]:
topic_titles = []

for tag in topic_title_tags:
    topic_titles.append(tag.text)

In [58]:
print(topic_titles)

['3D', 'Ajax', 'Algorithm', 'Amp', 'Android', 'Angular', 'Ansible', 'API', 'Arduino', 'ASP.NET', 'Awesome Lists', 'Amazon Web Services', 'Azure', 'Babel', 'Bash', 'Bitcoin', 'Bootstrap', 'Bot', 'C', 'Chrome', 'Chrome extension', 'Command line interface', 'Clojure', 'Code quality', 'Code review', 'Compiler', 'Continuous integration', 'C++', 'Cryptocurrency', 'Crystal']


In [61]:
topic_descs = []

for tag in topic_desc_tags:
    topic_descs.append(tag.text.strip())

In [63]:
print(topic_descs[:5])

['3D refers to the use of three-dimensional graphics, modeling, and animation in various industries.', 'Ajax is a technique for creating interactive web applications.', 'Algorithms are self-contained sequences that carry out a variety of tasks.', 'Amp is a non-blocking concurrency library for PHP.', 'Android is an operating system built by Google designed for mobile devices.']


In [64]:
topic_urls = []
base_url = 'https://github.com'
for tag in topic_link_tags:
    topic_urls.append(base_url + tag['href'])

In [65]:
topic_urls

['https://github.com/topics/3d',
 'https://github.com/topics/ajax',
 'https://github.com/topics/algorithm',
 'https://github.com/topics/amphp',
 'https://github.com/topics/android',
 'https://github.com/topics/angular',
 'https://github.com/topics/ansible',
 'https://github.com/topics/api',
 'https://github.com/topics/arduino',
 'https://github.com/topics/aspnet',
 'https://github.com/topics/awesome',
 'https://github.com/topics/aws',
 'https://github.com/topics/azure',
 'https://github.com/topics/babel',
 'https://github.com/topics/bash',
 'https://github.com/topics/bitcoin',
 'https://github.com/topics/bootstrap',
 'https://github.com/topics/bot',
 'https://github.com/topics/c',
 'https://github.com/topics/chrome',
 'https://github.com/topics/chrome-extension',
 'https://github.com/topics/cli',
 'https://github.com/topics/clojure',
 'https://github.com/topics/code-quality',
 'https://github.com/topics/code-review',
 'https://github.com/topics/compiler',
 'https://github.com/topics/co

To create a csv file, we can use pandas dataframe 

In [66]:
import pandas as pd

In [67]:
topics_dict = {
    'title': topic_titles,
    'description':topic_descs,
    'url': topic_urls
}

In [68]:
topics_df = pd.DataFrame(topics_dict)

In [69]:
topics_df

Unnamed: 0,title,description,url
0,3D,3D refers to the use of three-dimensional grap...,https://github.com/topics/3d
1,Ajax,Ajax is a technique for creating interactive w...,https://github.com/topics/ajax
2,Algorithm,Algorithms are self-contained sequences that c...,https://github.com/topics/algorithm
3,Amp,Amp is a non-blocking concurrency library for ...,https://github.com/topics/amphp
4,Android,Android is an operating system built by Google...,https://github.com/topics/android
5,Angular,Angular is an open source web application plat...,https://github.com/topics/angular
6,Ansible,Ansible is a simple and powerful automation en...,https://github.com/topics/ansible
7,API,An API (Application Programming Interface) is ...,https://github.com/topics/api
8,Arduino,Arduino is an open source platform for buildin...,https://github.com/topics/arduino
9,ASP.NET,ASP.NET is a web framework for building modern...,https://github.com/topics/aspnet


### Creating a CSV file with the extracted information

In [71]:
topics_df.to_csv('topics.csv',index=None)

### Getting information out of a topic page 

In [72]:
topic_page_url = topic_urls[0]

In [73]:
topic_page_url

'https://github.com/topics/3d'

In [74]:
response = requests.get(topic_page_url)

In [75]:
response.status_code

200

In [76]:
len(response.text)

515719

In [77]:
topic_doc = BeautifulSoup(response.text,'html.parser')

In [80]:
h3_selection_class = 'f3 color-fg-muted text-normal lh-condensed'

repo_tags = topic_doc.find_all('h3',{'class':h3_selection_class})

In [81]:
len(repo_tags)

20

In [87]:
a_tags = repo_tags[0].findAll('a')

In [88]:
a_tags

[<a class="Link" data-hydro-click='{"event_type":"explore.click","payload":{"click_context":"REPOSITORY_CARD","click_target":"OWNER","click_visual_representation":"REPOSITORY_OWNER_HEADING","actor_id":null,"record_id":97088,"originating_url":"https://github.com/topics/3d","user_id":null}}' data-hydro-click-hmac="c72fbd5c69a8ee7c9c53a4e65de2b93c8fc7552dd793945819639bc165c0f0ba" data-turbo="false" data-view-component="true" href="/mrdoob">
             mrdoob
 </a>,
 <a class="Link text-bold wb-break-word" data-hydro-click='{"event_type":"explore.click","payload":{"click_context":"REPOSITORY_CARD","click_target":"REPOSITORY","click_visual_representation":"REPOSITORY_NAME_HEADING","actor_id":null,"record_id":576201,"originating_url":"https://github.com/topics/3d","user_id":null}}' data-hydro-click-hmac="4a2667db3d63a1739c412e059e5da95afe419df83f70949b5d59dc3478f5c79a" data-turbo="false" data-view-component="true" href="/mrdoob/three.js">
             three.js
 </a>]

In [89]:
a_tags[0]

<a class="Link" data-hydro-click='{"event_type":"explore.click","payload":{"click_context":"REPOSITORY_CARD","click_target":"OWNER","click_visual_representation":"REPOSITORY_OWNER_HEADING","actor_id":null,"record_id":97088,"originating_url":"https://github.com/topics/3d","user_id":null}}' data-hydro-click-hmac="c72fbd5c69a8ee7c9c53a4e65de2b93c8fc7552dd793945819639bc165c0f0ba" data-turbo="false" data-view-component="true" href="/mrdoob">
            mrdoob
</a>

In [91]:
a_tags[0].text.strip()

'mrdoob'

In [92]:
a_tags[1]

<a class="Link text-bold wb-break-word" data-hydro-click='{"event_type":"explore.click","payload":{"click_context":"REPOSITORY_CARD","click_target":"REPOSITORY","click_visual_representation":"REPOSITORY_NAME_HEADING","actor_id":null,"record_id":576201,"originating_url":"https://github.com/topics/3d","user_id":null}}' data-hydro-click-hmac="4a2667db3d63a1739c412e059e5da95afe419df83f70949b5d59dc3478f5c79a" data-turbo="false" data-view-component="true" href="/mrdoob/three.js">
            three.js
</a>

In [93]:
a_tags[1].text.strip()

'three.js'

In [94]:
a_tags[1]['href']

'/mrdoob/three.js'

In [95]:
base_url = 'https://github.com'
repo_url = base_url + a_tags[1]['href']

print(repo_url)

https://github.com/mrdoob/three.js


In [96]:
star_tag = topic_doc.find_all('span',{'id': 'repo-stars-counter-star'})

In [97]:
len(star_tag)

20

In [111]:
star_tag[0]['title']

'100,362'

In [114]:
def get_repo_info(h3_tag,star_tag):
    # returns all the required info about a repository
    a_tags = h3_tag.findAll('a')
    username = a_tags[0].text.strip()
    repo_name = a_tags[1].text.strip()
    repo_url = base_url + a_tags[1]['href']
    stars = star_tag['title']

    return username, repo_name,stars,repo_url

In [102]:
repo_tags

[<h3 class="f3 color-fg-muted text-normal lh-condensed">
 <a class="Link" data-hydro-click='{"event_type":"explore.click","payload":{"click_context":"REPOSITORY_CARD","click_target":"OWNER","click_visual_representation":"REPOSITORY_OWNER_HEADING","actor_id":null,"record_id":97088,"originating_url":"https://github.com/topics/3d","user_id":null}}' data-hydro-click-hmac="c72fbd5c69a8ee7c9c53a4e65de2b93c8fc7552dd793945819639bc165c0f0ba" data-turbo="false" data-view-component="true" href="/mrdoob">
             mrdoob
 </a>          /
           <a class="Link text-bold wb-break-word" data-hydro-click='{"event_type":"explore.click","payload":{"click_context":"REPOSITORY_CARD","click_target":"REPOSITORY","click_visual_representation":"REPOSITORY_NAME_HEADING","actor_id":null,"record_id":576201,"originating_url":"https://github.com/topics/3d","user_id":null}}' data-hydro-click-hmac="4a2667db3d63a1739c412e059e5da95afe419df83f70949b5d59dc3478f5c79a" data-turbo="false" data-view-component="true"

In [103]:
star_tag

[<span aria-label="100362 users starred this repository" class="Counter js-social-count" data-plural-suffix="users starred this repository" data-singular-suffix="user starred this repository" data-turbo-replace="true" data-view-component="true" id="repo-stars-counter-star" title="100,362">100k</span>,
 <span aria-label="26612 users starred this repository" class="Counter js-social-count" data-plural-suffix="users starred this repository" data-singular-suffix="user starred this repository" data-turbo-replace="true" data-view-component="true" id="repo-stars-counter-star" title="26,612">26.6k</span>,
 <span aria-label="22943 users starred this repository" class="Counter js-social-count" data-plural-suffix="users starred this repository" data-singular-suffix="user starred this repository" data-turbo-replace="true" data-view-component="true" id="repo-stars-counter-star" title="22,943">22.9k</span>,
 <span aria-label="22760 users starred this repository" class="Counter js-social-count" data-

In [115]:
get_repo_info(repo_tags[0],star_tag[0])

('mrdoob', 'three.js', '100,362', 'https://github.com/mrdoob/three.js')

In [117]:
topic_repos_dict = {
    'username':[],
    'repo_name':[],
    'stars':[],
    'repo_url':[]
}

for i in range(len(repo_tags)):
    repo_info = get_repo_info(repo_tags[i],star_tag[i])
    
    topic_repos_dict['username'].append(repo_info[0])
    topic_repos_dict['repo_name'].append(repo_info[1])
    topic_repos_dict['stars'].append(repo_info[2])
    topic_repos_dict['repo_url'].append(repo_info[3])

In [118]:
topic_repos_dict

{'username': ['mrdoob',
  'pmndrs',
  'libgdx',
  'BabylonJS',
  'ssloy',
  'FreeCAD',
  'lettier',
  'aframevr',
  'CesiumGS',
  'blender',
  'MonoGame',
  'mapbox',
  'isl-org',
  'metafizzy',
  'timzhang642',
  'nerfstudio-project',
  'a1studmuffin',
  '4ian',
  'FyroxEngine',
  'domlysz'],
 'repo_name': ['three.js',
  'react-three-fiber',
  'libgdx',
  'Babylon.js',
  'tinyrenderer',
  'FreeCAD',
  '3d-game-shaders-for-beginners',
  'aframe',
  'cesium',
  'blender',
  'MonoGame',
  'mapbox-gl-js',
  'Open3D',
  'zdog',
  '3D-Machine-Learning',
  'nerfstudio',
  'SpaceshipGenerator',
  'GDevelop',
  'Fyrox',
  'BlenderGIS'],
 'stars': ['100,362',
  '26,612',
  '22,943',
  '22,760',
  '19,812',
  '18,244',
  '17,370',
  '16,417',
  '12,371',
  '12,148',
  '11,089',
  '10,886',
  '10,806',
  '10,289',
  '9,529',
  '8,893',
  '7,614',
  '7,506',
  '7,485',
  '7,434'],
 'repo_url': ['https://github.com/mrdoob/three.js',
  'https://github.com/pmndrs/react-three-fiber',
  'https://github

In [119]:
topic_repos_df = pd.DataFrame(topic_repos_dict)

In [120]:
topic_repos_df

Unnamed: 0,username,repo_name,stars,repo_url
0,mrdoob,three.js,100362,https://github.com/mrdoob/three.js
1,pmndrs,react-three-fiber,26612,https://github.com/pmndrs/react-three-fiber
2,libgdx,libgdx,22943,https://github.com/libgdx/libgdx
3,BabylonJS,Babylon.js,22760,https://github.com/BabylonJS/Babylon.js
4,ssloy,tinyrenderer,19812,https://github.com/ssloy/tinyrenderer
5,FreeCAD,FreeCAD,18244,https://github.com/FreeCAD/FreeCAD
6,lettier,3d-game-shaders-for-beginners,17370,https://github.com/lettier/3d-game-shaders-for...
7,aframevr,aframe,16417,https://github.com/aframevr/aframe
8,CesiumGS,cesium,12371,https://github.com/CesiumGS/cesium
9,blender,blender,12148,https://github.com/blender/blender


This we have done for only one topic- 3d want to do it for others as well

In [139]:
def get_topic_page(topic_url):
    # Download the page 
    response = requests.get(topic_url)
    
    # Check successful response 
    if response.status_code != 200:
        raise Exception('Failed to Load Page {}'.format(topic_url))
    
    # Parse Using BeautifulSoup
    topic_doc = BeautifulSoup(response.text,'html.parser')
    return topic_doc

def get_repo_info(h3_tag,star_tag):
    # returns all the required info about a repository
    a_tags = h3_tag.findAll('a')
    username = a_tags[0].text.strip()
    repo_name = a_tags[1].text.strip()
    repo_url = base_url + a_tags[1]['href']
    stars = star_tag['title']

    return username, repo_name,stars,repo_url

def get_topic_repos(topic_doc):
    # Get h3 Tags containing repo title, repo URL and username
    h3_selection_class = 'f3 color-fg-muted text-normal lh-condensed'
    repo_tags = topic_doc.find_all('h3',{'class':h3_selection_class})

    # Get Star tags
    star_tag = topic_doc.find_all('span',{'id': 'repo-stars-counter-star'})

    topic_repos_dict = {'username':[],'repo_name':[],'stars':[],'repo_url':[]}

    # Get repo info
    for i in range(len(repo_tags)):
        repo_info = get_repo_info(repo_tags[i],star_tag[i])

        topic_repos_dict['username'].append(repo_info[0])
        topic_repos_dict['repo_name'].append(repo_info[1])
        topic_repos_dict['stars'].append(repo_info[2])
        topic_repos_dict['repo_url'].append(repo_info[3])
    
    return pd.DataFrame(topic_repos_dict)

def scrape_topic(topic_url,topic_name):
    topic_df = get_topic_repos(get_topic_page(topic_url))
    topic_df.to_csv(topic_name + '.csv',index=None)


In [123]:
url3 = topic_urls[3]

url3



'https://github.com/topics/amphp'

In [124]:
url3 = topic_urls[3]

topic3_doc = get_topic_page(url3)

topic3_repos = get_topic_repos(topic3_doc)

In [125]:
topic3_repos

Unnamed: 0,username,repo_name,stars,repo_url
0,amphp,amp,4175,https://github.com/amphp/amp
1,danog,MadelineProto,2733,https://github.com/danog/MadelineProto
2,unreal4u,telegram-api,784,https://github.com/unreal4u/telegram-api
3,amphp,parallel,760,https://github.com/amphp/parallel
4,amphp,http-client,700,https://github.com/amphp/http-client
5,amphp,byte-stream,362,https://github.com/amphp/byte-stream
6,amphp,mysql,352,https://github.com/amphp/mysql
7,php-service-bus,service-bus,350,https://github.com/php-service-bus/service-bus
8,amphp,parallel-functions,268,https://github.com/amphp/parallel-functions
9,amphp,process,227,https://github.com/amphp/process


In [131]:
topic_urls[2]

'https://github.com/topics/algorithm'

In [133]:
get_topic_repos(get_topic_page(topic_urls[2])).to_csv('algorithm.csv',index=None)

### Combining all the stuff

In [138]:
def get_topic_titles(doc):
    selection_class = 'f3 lh-condensed mb-0 mt-1 Link--primary'
    topic_title_tags = doc.find_all('p',{'class': selection_class})
    topic_titles = []

    for tag in topic_title_tags:
        topic_titles.append(tag.text)
    
    return topic_titles

def get_topic_desc(doc):
    desc_selector = 'f5 color-fg-muted mb-0 mt-1'
    topic_desc_tags = doc.findAll('p',{'class': desc_selector})
    topic_descs = []
    
    for tag in topic_desc_tags:
        topic_descs.append(tag.text.strip())
    
    return topic_descs

def get_topic_url(doc):
    topic_link_tags = doc.find_all('a',{'class': 'no-underline flex-1 d-flex flex-column'})
    topic_urls = []
    
    base_url = 'https://github.com'
    for tag in topic_link_tags:
        topic_urls.append(base_url + tag['href'])
    
    return topic_urls

def scrape_topics():
    topic_url = 'https://github.com/topics'
    response = requests.get(topic_url)

    # Check successful response 
    if response.status_code != 200:
        raise Exception('Failed to Load Page {}'.format(topic_url))
    
    page_contents = response.text
    doc = BeautifulSoup(page_contents,'html.parser')
    
    topics_dict = {'title': get_topic_titles(doc),'description': get_topic_desc(doc),'URL': get_topic_url(doc)}

    return pd.DataFrame(topics_dict)

In [137]:
scrape_topics()

Unnamed: 0,title,description,URL
0,3D,3D refers to the use of three-dimensional grap...,https://github.com/topics/3d
1,Ajax,Ajax is a technique for creating interactive w...,https://github.com/topics/ajax
2,Algorithm,Algorithms are self-contained sequences that c...,https://github.com/topics/algorithm
3,Amp,Amp is a non-blocking concurrency library for ...,https://github.com/topics/amphp
4,Android,Android is an operating system built by Google...,https://github.com/topics/android
5,Angular,Angular is an open source web application plat...,https://github.com/topics/angular
6,Ansible,Ansible is a simple and powerful automation en...,https://github.com/topics/ansible
7,API,An API (Application Programming Interface) is ...,https://github.com/topics/api
8,Arduino,Arduino is an open source platform for buildin...,https://github.com/topics/arduino
9,ASP.NET,ASP.NET is a web framework for building modern...,https://github.com/topics/aspnet


In [142]:
import os

In [140]:
def scrape_topics_repos():
    print('Scraping List of Topics')
    topics_df = scrape_topics()
    
    for index, row in topics_df.iterrows():
        print('Scraping top repositories for "{}"'.format(row['title']))
        scrape_topic(row['URL'],row['title'])

In [141]:
scrape_topics_repos()

Scraping List of Topics
Scraping top repositories for "3D"
Scraping top repositories for "Ajax"
Scraping top repositories for "Algorithm"
Scraping top repositories for "Amp"
Scraping top repositories for "Android"
Scraping top repositories for "Angular"
Scraping top repositories for "Ansible"
Scraping top repositories for "API"
Scraping top repositories for "Arduino"
Scraping top repositories for "ASP.NET"
Scraping top repositories for "Awesome Lists"
Scraping top repositories for "Amazon Web Services"
Scraping top repositories for "Azure"
Scraping top repositories for "Babel"
Scraping top repositories for "Bash"
Scraping top repositories for "Bitcoin"
Scraping top repositories for "Bootstrap"
Scraping top repositories for "Bot"
Scraping top repositories for "C"
Scraping top repositories for "Chrome"
Scraping top repositories for "Chrome extension"
Scraping top repositories for "Command line interface"
Scraping top repositories for "Clojure"
Scraping top repositories for "Code quality"

## Final Code

In [None]:
def get_topic_titles(doc):
    selection_class = 'f3 lh-condensed mb-0 mt-1 Link--primary'
    topic_title_tags = doc.find_all('p',{'class': selection_class})
    topic_titles = []

    for tag in topic_title_tags:
        topic_titles.append(tag.text)
    
    return topic_titles

def get_topic_desc(doc):
    desc_selector = 'f5 color-fg-muted mb-0 mt-1'
    topic_desc_tags = doc.findAll('p',{'class': desc_selector})
    topic_descs = []
    
    for tag in topic_desc_tags:
        topic_descs.append(tag.text.strip())
    
    return topic_descs

def get_topic_url(doc):
    topic_link_tags = doc.find_all('a',{'class': 'no-underline flex-1 d-flex flex-column'})
    topic_urls = []
    
    base_url = 'https://github.com'
    for tag in topic_link_tags:
        topic_urls.append(base_url + tag['href'])
    
    return topic_urls

def scrape_topics():
    topic_url = 'https://github.com/topics'
    response = requests.get(topic_url)

    # Check successful response 
    if response.status_code != 200:
        raise Exception('Failed to Load Page {}'.format(topic_url))
    
    page_contents = response.text
    doc = BeautifulSoup(page_contents,'html.parser')
    
    topics_dict = {'title': get_topic_titles(doc),'description': get_topic_desc(doc),'URL': get_topic_url(doc)}

    return pd.DataFrame(topics_dict)

def get_topic_page(topic_url):
    # Download the page 
    response = requests.get(topic_url)
    
    # Check successful response 
    if response.status_code != 200:
        raise Exception('Failed to Load Page {}'.format(topic_url))
    
    # Parse Using BeautifulSoup
    topic_doc = BeautifulSoup(response.text,'html.parser')
    return topic_doc

def get_repo_info(h3_tag,star_tag):
    # returns all the required info about a repository
    a_tags = h3_tag.findAll('a')
    username = a_tags[0].text.strip()
    repo_name = a_tags[1].text.strip()
    repo_url = base_url + a_tags[1]['href']
    stars = star_tag['title']

    return username, repo_name,stars,repo_url

def get_topic_repos(topic_doc):
    # Get h3 Tags containing repo title, repo URL and username
    h3_selection_class = 'f3 color-fg-muted text-normal lh-condensed'
    repo_tags = topic_doc.find_all('h3',{'class':h3_selection_class})

    # Get Star tags
    star_tag = topic_doc.find_all('span',{'id': 'repo-stars-counter-star'})

    topic_repos_dict = {'username':[],'repo_name':[],'stars':[],'repo_url':[]}

    # Get repo info
    for i in range(len(repo_tags)):
        repo_info = get_repo_info(repo_tags[i],star_tag[i])

        topic_repos_dict['username'].append(repo_info[0])
        topic_repos_dict['repo_name'].append(repo_info[1])
        topic_repos_dict['stars'].append(repo_info[2])
        topic_repos_dict['repo_url'].append(repo_info[3])
    
    return pd.DataFrame(topic_repos_dict)

def scrape_topic(topic_url,topic_name):
    topic_df = get_topic_repos(get_topic_page(topic_url))
    topic_df.to_csv(topic_name + '.csv',index=None)

def scrape_topics_repos():
    print('Scraping List of Topics')
    topics_df = scrape_topics()
    
    for index, row in topics_df.iterrows():
        print('Scraping top repositories for "{}"'.format(row['title']))
        scrape_topic(row['URL'],row['title'])