In [36]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

In [2]:
pyladies_request = requests.get('https://www.pyladies.com/locations')
pyladies_soup = BeautifulSoup(pyladies_request.text, 'html.parser')

In [7]:
all_chapters = pyladies_soup.find_all(class_="chapter_location")
print('There are {} PyLadies chapters listed on the PyLadies website'.format(len(all_chapters)))

There are 84 PyLadies chapters listed on the PyLadies website


## Determine which chapters have Meetup pages.

In [10]:
ix = 10
single_chapter = all_chapters[ix]
single_chapter

<div class="chapter_location" data-chapter-name="Berlin, Germany" data-meetup-id="9206152">
<div class="logo-container">
<img alt="Berlin, Germany" src="../assets/images/pyladies_berlin.png"/>
</div>
<h3 class="chpts chapter-name">
<a href="http://berlin.pyladies.com">
                            
                            Berlin, Germany</a>
</h3>
<h3 class="chpts social-icons">
<a class="social icon link" data-icon="🔗" href="http://berlin.pyladies.com" title="Website"></a>
<a class="social icon vcard" data-icon="" href="/cdn-cgi/l/email-protection#375552455b5e5977474e5b56535e52441954585a" title="Contact"></a>
<a class="social icon twitter" data-icon="" href="https://twitter.com/PyLadiesBer" title="Twitter"></a>
<a class="social icon location" data-icon="" href="http://www.meetup.com/PyLadies-Berlin/" title="Meetup Link"></a>
</h3>
</div>

In [21]:
chapter_name = single_chapter.find("h3", class_="chpts chapter-name").find("a").text.strip()
print('The chapter name is: {}'.format(chapter_name))

The chapter name is: Berlin, Germany


In [25]:
chapter_meetup = single_chapter.find_all(title="Meetup Link", href=True)
if len(chapter_meetup) == 1:
    meetup_url = chapter_meetup[0]['href']
    print('The chapter {} has a meetup link: {}'.format(chapter_name, meetup_url))
else:
    print('The chapter {} DOES NOT have a meetup link'.format(chapter_name))

The chapter Berlin, Germany has a meetup link: http://www.meetup.com/PyLadies-Berlin/


#### Create a DataFrame of chapters and Meetup URLs.

In [56]:
chapter_ids = []
chapter_names = []
meetup_urls = []

for chapter_id, single_chapter in enumerate(all_chapters):
    chapter_ids.append(chapter_id)
    chapter_names.append(single_chapter.find("h3", class_="chpts chapter-name").find("a").text.strip())
    chapter_meetup = single_chapter.find_all(title="Meetup Link", href=True)
    if len(chapter_meetup) > 0:
        #print(chapter_name)
        meetup_urls.append(chapter_meetup[0]['href'])
    else:
        #print(chapter_name)
        meetup_urls.append(np.nan)

In [57]:
df = pd.DataFrame(index=chapter_ids, data={'chapter_name':chapter_names, 'meetup_url': meetup_url})
df.head()

Unnamed: 0,chapter_name,meetup_url
0,"Amsterdam, the Netherlands",http://www.meetup.com/PyLadiesAMS/
1,"Antananarivo, MDG",
2,"Aracati, Brazil",http://www.meetup.com/PyladeisAracati/
3,"Atlanta, GA",http://www.meetup.com/pyladiesATL/
4,"Austin, TX",http://www.meetup.com/pyladies-atx/


## Get information from Meetup pages that exist.

In [59]:
ix = 30
meetup_url = df.dropna(subset=['meetup_url'])['meetup_url'].iloc[ix]
meetup_url

'http://www.meetup.com/nyc-pyladies/'

In [102]:
num_mems = []
index = []
for ix, row in df.iterrows():
    print(ix)
    row_url = row['meetup_url']
    if not pd.isnull(row_url):
        index.append(ix)
        row_request = requests.get(row_url)
        row_soup = BeautifulSoup(row_request.text, 'html.parser')
        if len(row_soup.find_all(text=" The Meetup Group you're looking for doesn't exist. ")) == 1:
            num_members = np.nan
        else:
            num_members = int(row_soup.find_all(class_="groupHomeHeaderInfo-memberLink")[0].text.split(' members')[0].replace(',',''))
        num_mems.append(num_members)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41


IndexError: list index out of range

In [105]:
num_mems

[295,
 nan,
 nan,
 1021,
 nan,
 190,
 1139,
 1256,
 298,
 74,
 nan,
 1060,
 nan,
 630,
 291,
 121,
 94,
 277,
 nan,
 307,
 138,
 nan,
 1592,
 772,
 11]

In [91]:
row_soup.find_all('h2')

[<h2 class="desc big">
 neighbors getting together to learn something, do something, share something…
 </h2>,
 <h2> The Meetup Group you're looking for doesn't exist. </h2>,
 <h2 class="text--display2 margin-bottom margin--bottom align-center align--center">
 				Sign up
 			</h2>]

In [71]:
for meetup_url in df.dropna(subset=['meetup_url'])['meetup_url']:
    meetup_request = requests.get(meetup_url)
    meetup_soup = BeautifulSoup(meetup_request.text, 'html.parser')
    num_members = int(meetup_soup.find_all(class_="groupHomeHeaderInfo-memberLink")[0].text.split(' members')[0].replace(',',''))


SyntaxError: unexpected EOF while parsing (<ipython-input-71-19c87192a74f>, line 1)

In [61]:
meetup_request = requests.get(meetup_url)
meetup_soup = BeautifulSoup(meetup_request.text, 'html.parser')

In [62]:
num_members = int(meetup_soup.find_all(class_="groupHomeHeaderInfo-memberLink")[0].text.split(' members')[0].replace(',',''))
print('There are {} members on the meetup page'.format(num_members))

There are 3225 members on the meetup page


In [63]:
events = meetup_soup.find_all(class_="text--sectionTitle text--bold padding--bottom")
events

[<h3 class="text--sectionTitle text--bold padding--bottom"><span>Next Meetup</span></h3>,
 <h3 class="text--sectionTitle text--bold padding--bottom"><span>Past Meetups (126)</span></h3>]

In [64]:
num_past = int([f for f in events if "Past" in str(f)][0].text.split('(')[1].split(')')[0])
print('They have hosted {} events'.format(num_past))

They have hosted 126 events


In [65]:
[f for f in events if "Next" in str(f)][0].text

'Next Meetup'

In [66]:
meetup_soup

<!DOCTYPE html>
<html data-reactroot="" lang="en"><head><title data-react-helmet="true">NYC PyLadies (New York, NY) | Meetup</title><meta content="width=device-width, initial-scale=1" data-react-helmet="true" name="viewport"/><meta content="text/html; charset=utf-8" data-react-helmet="true" http-equiv="Content-Type"/><meta content="IE=edge" data-react-helmet="true" http-equiv="X-UA-Compatible"/><meta content="h5EhuAEkLFlZmMxwpH5wnRaoDEmqYCCEUE+FLcrRNvE=" data-react-helmet="true" name="verify-v1"/><meta content="index, follow" data-react-helmet="true" name="robots"/><meta content="PyLadies• Mission (http://www.pyladies.com)• Code of Conduct (http://www.pyladies.com/CodeOfConduct/)A group for Python ladies and non-binary people of all levels of programming experience, in the NYC" data-react-helmet="true" name="description"/><meta content="Web Standards,Internet Professionals,Web Technology,Web Development,Software Development,Open Source,Web Design,Computer programming,Python,PyLadies,NY