[View in Colaboratory](https://colab.research.google.com/github/gowtham91m/gofundme/blob/master/Scraper.ipynb)

In [0]:
%%capture
!pip install "requests[security]"

In [0]:
from bs4 import BeautifulSoup as bs
import requests
from IPython.display import display, clear_output
import re
import pandas as pd
from time import time
from collections import defaultdict

In [0]:
class web_scraper:
  def __init__(self,url):
    self.url = url
    
  def get_categories(self):
    soup = requests.get(url)
    soup = bs(soup.text,'html.parser')
    category = soup.findAll(class_='text-black')
    categories = [i.text for i in category]  
    return categories[:1] #16
  
  def details_parser(self,url):
    
    soup=bs(requests.get(url).text,'html.parser')
    
    try: text = soup.findAll(class_="co-story truncate-text truncate-text--description js-truncate")[0].text.strip()
    except: text = 'exception occured for' + url
      
    try: likes =  soup.findAll(class_='roundedNum')[0].text
    except IndexError: likes = 0
      
    try: photos = soup.findAll(class_='open-media-viewer')[0].text.strip()
    except IndexError: photos = 0
    
    try: shares = soup.findAll(class_='js-share-count-text')[0].text.strip()
    except IndexError: shares = 0
      
    try: 
      donation = soup.findAll(class_='campaign-status text-small')[0].text.strip()
      donation = donation.split(' ')
      donation_count = donation[2]
      duration = ' '.join(donation[-2:])
    except:
      donation_count = duration = 0
    
    return({'text':text, 'likes':likes, 'photos':photos, 'shares':shares, 'donation_count':donation_count, 'duration':duration})
  
  def scrape(self):
    start_time = time()
    df = pd.DataFrame({})
    for i in self.get_categories():
      print(i,end='  ')
      i='-'.join(i.split(' '))
      i = 'animal' if i == 'Animals' else i
      url = 'https://www.gofundme.com/discover/'+i+'-fundraiser'
      soup = bs(requests.get(url).text,'html.parser')
      cid = re.findall('\d+',re.findall('cid=\'\s\+\s\'\d+', soup.find_all('script')[13].text)[0])[0]
      Resp,page = True,1
      while Resp:
        url = 'https://www.gofundme.com/mvc.php?route=categorypages/load_more&page='+str(page)+'&term=&cid='+cid
        soup = requests.get(url)
        soup = bs(soup.text, 'html.parser')
        if len(soup) <1: Resp =False
        name = [ hit.text  for hit in soup.findAll(attrs={'class' : 'fund-title truncate-single-line show-for-medium'})]
        href = [i['href'] for i in soup.findAll('a',attrs={'class':'campaign-tile-img--contain'})]
        location = [i.text[1:-1] for i in soup.findAll(class_='fund-item fund-location truncate-single-line')]
        raised = [i.findAll('strong')[0].text[:-7] for i in soup.findAll(class_="fund-item truncate-single-line")]
        goal = [re.findall('\$\d+\,\d+',i.text)[1] for i in soup.findAll(class_="fund-item truncate-single-line")]
        
        details =defaultdict(list)
        for link in soup.findAll('a',attrs={'class':'campaign-tile-img--contain'}):
          for key, value in self.details_parser(link['href']).items():
            details[key].append(value)
        
        df = df.append(pd.DataFrame({'category':[i]*len(name),
                                     'name':name,
                                     'href':href,
                                     'location':location, 
                                     'goal':goal,
                                     'raised':raised,
                                     'text':details['text'],
                                     'likes':details['likes'],
                                     'shares':details['shares'],
                                     'photos':details['photos'],
                                     'donation_count':details['donation_count'],
                                     'duration':details['duration']}))
        if (page%10==0):print(page,end=' ')
        page+=1
      print('\n')
    clear_output()
    columns = ['category','name','href','location','raised','goal','text','likes','shares','photos','donation_count','duration']
    print('campaigns scrape time', time()-start_time)
    return df[columns]
  
  def get_donation_amount(self,df):
    start_time = time()
    donation_data = pd.DataFrame({})
    for i in df.href:
      campaign = i[25:]
      url = 'https://www.gofundme.com/mvc.php?route=donate/pagingDonationsFoundation&url='+campaign+'&idx=10&type=recent'
      soup = requests.get(url)
      soup=bs(soup.text,'html.parser')
      donation = [i.text for i in soup.findAll(class_='supporter-amount')]
      time_gap = [i.text[:-4] for i in soup.findAll(class_='supporter-time')]
      donation_data = donation_data.append(pd.DataFrame({'href':[i]*len(donation),
                                                         'donation_amount':donation,
                                                         'time':time_gap}))
    print('donation amount scrape time', time()-start_time)
    return donation_data

In [25]:
url = 'https://www.gofundme.com/discover'
scraper = web_scraper(url)
campaign_data = scraper.scrape()
#donation_data = scraper.get_donation_amount(campaign_data)
# Merge campaign_data and donation_data on name

campaigns scrape time 594.9056766033173


In [26]:
campaign_data.head()

Unnamed: 0,category,name,href,location,raised,goal,text,likes,shares,photos,donation_count,duration
0,Medical,Kdafoos ... Cancer ...,https://www.gofundme.com/kdafoos-cancer,"Houston, TX","$676,152","$500,000",I’vee been fighting cancer for the past 5 year...,2.4k,330,11,2408,1 month
1,Medical,92 Yr old Man Brutally Attacked.,https://www.gofundme.com/3ctqm-medical-bills-f...,"Los Angeles, CA","$327,345","$15,000","On July 4th at around 7pm, my grandfather Rodo...",12k,26k,4,12167,1 month
2,Medical,Olivia Stoy:Transplant & Liv it up!,https://www.gofundme.com/olivia-stoy-bone-marr...,"Ashley, IN","$316,226","$316,226",Update: Many fundraisers have been put into pl...,5.7k,12k,44,5597,3 months
3,Medical,AUTOLOGOUS T CELL TRANSPLANT,https://www.gofundme.com/autologous-Tcell-Tran...,"Staten Island, NY","$241,105","$250,000",43 year old Philip Defonte who is a husband & ...,835,1.8k,0,840,2 months
4,Medical,A chance of rebirth,https://www.gofundme.com/a-chance-of-rebirth,"Dublin, CA","$237,354","$225,000","Ganga was the life of every party, the person ...",4.7k,9.7k,10,4706,1 month


In [24]:
donation_data.head()

Unnamed: 0,donation_amount,href,time
0,$25,https://www.gofundme.com/kdafoos-cancer,1 month
1,$10,https://www.gofundme.com/kdafoos-cancer,1 month
2,$50,https://www.gofundme.com/kdafoos-cancer,1 month
3,$100,https://www.gofundme.com/kdafoos-cancer,1 month
4,$100,https://www.gofundme.com/kdafoos-cancer,1 month
