# Web Scraping Code

This is the code that will be used for the various python scrapers. Each section will detail out its own scenario-specific code. I expect we'll have specific code for each type of scraper.

In [1]:
import requests
import json
from time import sleep
import random # to set sleep values

# Meetup

In [2]:
sources = ["https://www.meetup.com/ApolloMeteorNYC/events/", "https://www.hellomonday.com/", "https://www.meetup.com/ApolloMeteorNYC/members/"]
mgroups = ["Data-Science-Panel","ApolloMeteorNYC","sfomug","New-York-MongoDB-User-Group"]

all_members = []
group_listing = []

total_members = [1505,2866,3146,6575]
#total_members = [2,1,2,3]


PAGESIZE = 30

### Meetup member listings

A short description here.

In [3]:
def getOne(member_array):
    
    web_r = requests.get("https://www.meetup.com/mu_api/urlname/members?queries=%28endpoint%3Agroups%2FApolloMeteorNYC%2Fmembers%2Clist%3A%28dynamicRef%3Alist_groupMembers_apollometeornyc_all%2Cmerge%3A%28isReverse%3A%21f%29%29%2Cmeta%3A%28method%3Aget%29%2Cparams%3A%28filter%3Aall%2Cpage%3A1%2Csort%3Ajoined%29%2Cref%3AgroupMembers_apollometeornyc_all%29")
    outer_list = web_r.json().get("responses")
    return outer_list

all_members.append(getOne(all_members))

In [4]:
len(all_members)
all_members[0]

[{'ref': 'groupMembers_apollometeornyc_all',
  'value': {'value': [{'id': '200737999',
     'joined': '2020-04-09T23:38:37.000Z',
     'name': 'osmanatam',
     'role': '',
     'status': 'active',
     'intro': '',
     'title': '',
     'photo': {'id': '255891876',
      'highres_link': 'https://secure.meetupstatic.com/photos/member/c/a/a/4/highres_255891876.jpeg',
      'thumb_link': 'https://secure.meetupstatic.com/photos/member/c/a/a/4/thumb_255891876.jpeg'}},
    {'id': '281356625',
     'joined': '2020-04-09T23:29:58.000Z',
     'name': 'Peter D.',
     'role': '',
     'status': 'active',
     'intro': '',
     'title': '',
     'photo': {'id': '295420451',
      'highres_link': 'https://secure.meetupstatic.com/photos/member/9/e/0/3/highres_295420451.jpeg',
      'thumb_link': 'https://secure.meetupstatic.com/photos/member/9/e/0/3/thumb_295420451.jpeg'}},
    {'id': '1639448',
     'joined': '2020-04-08T19:48:13.000Z',
     'name': 'Crispina M.',
     'role': '',
     'status':

In [5]:
# requires a match pattern and a meetup name.

# page_grabs calculates the number of calls required to the XHR URL to get all members.

def page_grabs(total_members):
    total_pages = total_members//PAGESIZE
    if total_members%PAGESIZE > 0:
        total_pages = total_pages + 1
    print(f"Making {total_pages} fetches\n")
    return total_pages

def makeXhrUrl(meetup_group, page):
    source_url = "https://www.meetup.com/mu_api/urlname/members?queries=%28endpoint%3Agroups%2F{meetup_group}%2Fmembers%2Clist%3A%28dynamicRef%3Alist_groupMembers_{meetup_group_lc}_all%2Cmerge%3A%28isReverse%3A%21f%29%29%2Cmeta%3A%28method%3Aget%29%2Cparams%3A%28filter%3Aall%2Cpage%3A{page}%2Csort%3Ajoined%29%2Cref%3AgroupMembers_{meetup_group_lc}_all%29".format(meetup_group=meetup_group, page=page, meetup_group_lc=meetup_group.lower())
    #print("Assemble the URL and inject the appropriate parameters; start on the first page")
    return source_url
    

def download_listing(meetup_group, total_members):
    filename = "data/{group}.json".format(group=meetup_group)
    total_pages = page_grabs(total_members)
    try:
        with open("{fn}".format(fn=filename),"a") as outfile:
            for page in range(total_pages):
                web_req = requests.get(makeXhrUrl(meetup_group, page))
                all_members.append(web_req.json().get("responses"))

                json.dump(all_members, outfile)

                duration = random.randint(2, 13)
                sleep(duration)
                print(f"Slept {duration} after fetching {meetup_group} page {page} listing!")
    except:
        print("Encountered a problem writing while iterating.")
        
    print(f"Saved all_members data to {outfile.name}.")        
    return all_members
    

### Meetup classic profile page details

Make sure to scrape these pages with a logged-in meetup.com account, and you will get the full member names. So use Selenium.

In [None]:
from selenium import webdriver
from bs4 import BeautifulSoup

In [6]:
# some code here
def grabThePage(classic_profile, pattern):
    pass

### Meetup new profile page details

Make sure to scrape these pages with a logged-in meetup.com account, and you will get the full member names. So use Selenium.

In [7]:
# some code here too

## Test drivers

The code below will be used to thest the functions written above. They also include test data 

In [8]:

for i, mgroup in enumerate(mgroups):
    print(i, mgroup, total_members[i], page_grabs(total_members[i]))

Making 51 fetches

0 Data-Science-Panel 1505 51
Making 96 fetches

1 ApolloMeteorNYC 2866 96
Making 105 fetches

2 sfomug 3146 105
Making 220 fetches

3 New-York-MongoDB-User-Group 6575 220


In [9]:
page_grabs(total_members[1])

Making 96 fetches



96

In [10]:
duration = random.randint(2, 13)
sleep(duration)
print(f"Slept {duration}!")

Slept 4!


In [11]:
# makeXhrUrl(mgroups[0],1)
all_members = []

for i, mgroup in enumerate(mgroups):
    download_listing(mgroup,total_members[i])

Making 51 fetches

Saved all_members data to data/Data-Science-Panel.json.
Slept 13 after fetching Data-Science-Panel page 0 listing!
Saved all_members data to data/Data-Science-Panel.json.
Slept 12 after fetching Data-Science-Panel page 1 listing!
Saved all_members data to data/Data-Science-Panel.json.
Slept 7 after fetching Data-Science-Panel page 2 listing!
Saved all_members data to data/Data-Science-Panel.json.
Slept 6 after fetching Data-Science-Panel page 3 listing!
Saved all_members data to data/Data-Science-Panel.json.
Slept 9 after fetching Data-Science-Panel page 4 listing!
Saved all_members data to data/Data-Science-Panel.json.
Slept 13 after fetching Data-Science-Panel page 5 listing!
Saved all_members data to data/Data-Science-Panel.json.
Slept 11 after fetching Data-Science-Panel page 6 listing!
Saved all_members data to data/Data-Science-Panel.json.
Slept 6 after fetching Data-Science-Panel page 7 listing!
Saved all_members data to data/Data-Science-Panel.json.
Slept 8 a

Saved all_members data to data/ApolloMeteorNYC.json.
Slept 11 after fetching ApolloMeteorNYC page 21 listing!
Saved all_members data to data/ApolloMeteorNYC.json.
Slept 12 after fetching ApolloMeteorNYC page 22 listing!
Saved all_members data to data/ApolloMeteorNYC.json.
Slept 11 after fetching ApolloMeteorNYC page 23 listing!
Saved all_members data to data/ApolloMeteorNYC.json.
Slept 5 after fetching ApolloMeteorNYC page 24 listing!
Saved all_members data to data/ApolloMeteorNYC.json.
Slept 3 after fetching ApolloMeteorNYC page 25 listing!
Saved all_members data to data/ApolloMeteorNYC.json.
Slept 8 after fetching ApolloMeteorNYC page 26 listing!
Saved all_members data to data/ApolloMeteorNYC.json.
Slept 4 after fetching ApolloMeteorNYC page 27 listing!
Saved all_members data to data/ApolloMeteorNYC.json.
Slept 8 after fetching ApolloMeteorNYC page 28 listing!
Saved all_members data to data/ApolloMeteorNYC.json.
Slept 12 after fetching ApolloMeteorNYC page 29 listing!
Saved all_membe

Saved all_members data to data/sfomug.json.
Slept 3 after fetching sfomug page 0 listing!
Saved all_members data to data/sfomug.json.
Slept 10 after fetching sfomug page 1 listing!
Saved all_members data to data/sfomug.json.
Slept 7 after fetching sfomug page 2 listing!
Saved all_members data to data/sfomug.json.
Slept 9 after fetching sfomug page 3 listing!
Saved all_members data to data/sfomug.json.
Slept 11 after fetching sfomug page 4 listing!
Saved all_members data to data/sfomug.json.
Slept 4 after fetching sfomug page 5 listing!
Saved all_members data to data/sfomug.json.
Slept 10 after fetching sfomug page 6 listing!
Saved all_members data to data/sfomug.json.
Slept 10 after fetching sfomug page 7 listing!
Saved all_members data to data/sfomug.json.
Slept 12 after fetching sfomug page 8 listing!
Saved all_members data to data/sfomug.json.
Slept 10 after fetching sfomug page 9 listing!
Saved all_members data to data/sfomug.json.
Slept 9 after fetching sfomug page 10 listing!
Sav

Saved all_members data to data/sfomug.json.
Slept 7 after fetching sfomug page 90 listing!
Saved all_members data to data/sfomug.json.
Slept 5 after fetching sfomug page 91 listing!
Saved all_members data to data/sfomug.json.
Slept 11 after fetching sfomug page 92 listing!
Saved all_members data to data/sfomug.json.
Slept 10 after fetching sfomug page 93 listing!
Saved all_members data to data/sfomug.json.
Slept 3 after fetching sfomug page 94 listing!
Saved all_members data to data/sfomug.json.
Slept 12 after fetching sfomug page 95 listing!
Saved all_members data to data/sfomug.json.
Slept 11 after fetching sfomug page 96 listing!
Saved all_members data to data/sfomug.json.
Slept 7 after fetching sfomug page 97 listing!
Saved all_members data to data/sfomug.json.
Slept 9 after fetching sfomug page 98 listing!
Saved all_members data to data/sfomug.json.
Slept 10 after fetching sfomug page 99 listing!
Saved all_members data to data/sfomug.json.
Slept 7 after fetching sfomug page 100 li

Slept 8 after fetching New-York-MongoDB-User-Group page 51 listing!
Saved all_members data to data/New-York-MongoDB-User-Group.json.
Slept 8 after fetching New-York-MongoDB-User-Group page 52 listing!
Saved all_members data to data/New-York-MongoDB-User-Group.json.
Slept 13 after fetching New-York-MongoDB-User-Group page 53 listing!
Saved all_members data to data/New-York-MongoDB-User-Group.json.
Slept 12 after fetching New-York-MongoDB-User-Group page 54 listing!
Saved all_members data to data/New-York-MongoDB-User-Group.json.
Slept 12 after fetching New-York-MongoDB-User-Group page 55 listing!
Saved all_members data to data/New-York-MongoDB-User-Group.json.
Slept 8 after fetching New-York-MongoDB-User-Group page 56 listing!
Saved all_members data to data/New-York-MongoDB-User-Group.json.
Slept 10 after fetching New-York-MongoDB-User-Group page 57 listing!
Saved all_members data to data/New-York-MongoDB-User-Group.json.
Slept 10 after fetching New-York-MongoDB-User-Group page 58 listi

Saved all_members data to data/New-York-MongoDB-User-Group.json.
Slept 8 after fetching New-York-MongoDB-User-Group page 113 listing!
Saved all_members data to data/New-York-MongoDB-User-Group.json.
Slept 12 after fetching New-York-MongoDB-User-Group page 114 listing!
Saved all_members data to data/New-York-MongoDB-User-Group.json.
Slept 9 after fetching New-York-MongoDB-User-Group page 115 listing!
Saved all_members data to data/New-York-MongoDB-User-Group.json.
Slept 11 after fetching New-York-MongoDB-User-Group page 116 listing!
Saved all_members data to data/New-York-MongoDB-User-Group.json.
Slept 2 after fetching New-York-MongoDB-User-Group page 117 listing!
Saved all_members data to data/New-York-MongoDB-User-Group.json.
Slept 2 after fetching New-York-MongoDB-User-Group page 118 listing!
Saved all_members data to data/New-York-MongoDB-User-Group.json.
Slept 2 after fetching New-York-MongoDB-User-Group page 119 listing!
Saved all_members data to data/New-York-MongoDB-User-Group.j

Saved all_members data to data/New-York-MongoDB-User-Group.json.
Slept 5 after fetching New-York-MongoDB-User-Group page 174 listing!
Saved all_members data to data/New-York-MongoDB-User-Group.json.
Slept 3 after fetching New-York-MongoDB-User-Group page 175 listing!
Saved all_members data to data/New-York-MongoDB-User-Group.json.
Slept 6 after fetching New-York-MongoDB-User-Group page 176 listing!
Saved all_members data to data/New-York-MongoDB-User-Group.json.
Slept 11 after fetching New-York-MongoDB-User-Group page 177 listing!
Saved all_members data to data/New-York-MongoDB-User-Group.json.
Slept 9 after fetching New-York-MongoDB-User-Group page 178 listing!
Saved all_members data to data/New-York-MongoDB-User-Group.json.
Slept 6 after fetching New-York-MongoDB-User-Group page 179 listing!
Saved all_members data to data/New-York-MongoDB-User-Group.json.
Slept 6 after fetching New-York-MongoDB-User-Group page 180 listing!
Saved all_members data to data/New-York-MongoDB-User-Group.js

In [12]:
len(all_members)

472

In [13]:
with open("data/raw_member_list.json","a") as outfile:
    json.dump(all_members, outfile)
    print(f"Saved all_members data to {outfile.name}.")

Saved all_members data to data/raw_member_list.json.


In [16]:
for i, mgroup in enumerate(mgroups):
    filename = "data/{group}.array.json".format(group=mgroup)
    try:
        with open("{fn}".format(fn=filename),"a") as outfile:
            for page in range(total_pages):
                json.dump(all_members[page][0].get('value').get('value'), outfile)
                print(f"Saved {mgroup} member data to {outfile.name}. Parse this file to get what you need")        

                duration = random.randint(2, 13)
                sleep(duration)
                print(f"Slept {duration} after fetching {meetup_group} page {page} listing!")
    except:
        print("Encountered a problem writing while iterating.")


ttx = all_members[][0].get('value').get('value')

ttx
#ttx.get('value')

SyntaxError: invalid syntax (<ipython-input-16-d37a30ccda55>, line 16)