In [1]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import csv

def save_to_csv(data_list, filename):
    with open(filename, mode='w', newline='') as csvfile:
        fieldnames = ['University', 'Program', 'Degree', 'Entrance Semester', 'Decision', 'Decision Date', 'GPA', 'GRE Verbal', 'GRE Writing', 'GRE', 'Date Added', 'Notes']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        writer.writeheader()
        for data in data_list:
            writer.writerow(data)

data_list = []
for page in range(1, 331):
    url = "https://www.thegradcafe.com/survey/?per_page=40&q=psychology&institution=&program=&degree=&page={}".format(page)
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    elements = soup.find_all(class_='col')

    for element in elements:
        data_dict = {}
        try:
            notes = element.find('h6').find('span').text
            data_dict["Notes"] = notes

            university_program = element.find('h6').text
            university_program = element.find('h6').text[:len(university_program) - len(notes)]
            data_dict['Program'] = university_program[:university_program.find(',')]
            data_dict['University'] = university_program[university_program.find(',') + 2:].rstrip()

            date_added = element.find('p').text.replace('Added on ', '')
            data_dict["Date Added"] = datetime.strptime(date_added, '%B %d, %Y').strftime('%Y-%m-%d')

            badges = element.find_all('span', {'class': 'badge'})

            decision_and_date = badges[0].text
            if "on" in decision_and_date:
                data_dict["Decision"] = decision_and_date[:decision_and_date.find(' ')].replace('\n','').replace('\t','')
                decision_date = decision_and_date[decision_and_date.find(' ') + 1:].replace('on ', '').replace('\n','').replace('\t','')
                data_dict["Decision Date"] = datetime.strptime(decision_date + ' ' + data_dict["Date Added"][:4], '%d %b %Y').strftime('%Y-%m-%d')
            else:
                data_dict["Decision"] = decision_and_date.replace('\n','').replace('\t','')

            data_dict["Entrance Semester"] = badges[1].text[0] + badges[1].text[-2:]

            data_dict["Degree"] = badges[-2].text

            texts_and_cols = [("GPA", "GPA"), ("GRE V", "GRE Verbal"), ("GRE AW", "GRE Writing"), ("GRE", "GRE")]
            
            for i in range(2, len(badges) - 2):
                for text, col in texts_and_cols:
                    if text in badges[i].text:
                        data_dict[col] = badges[i].text.replace(text + ' ', '')
                        break

            if data_dict:
                data_list.append(data_dict)

        except Exception as e:
            continue

# Save data to a CSV file
save_to_csv(data_list, 'gradcafe_data.csv')