In [1]:
# Import dependencies
from bs4 import BeautifulSoup as bs
import requests
import re
import pandas as pd
import numpy as np

In [2]:
# Create a list for each data field on a Glassdoor review
review_title = []
employee_tenure = []
reviewer_profile = []
overall_rating_number = []
work_life_balance = []
culture_and_values = []
diversity_and_inclusion = []
career_opportunities = []
compensation_and_benefits = []
senior_management = []
pros = []
cons = []
recommend = []
ceo_approval = []
business_outlook = []

# The first page of the Microsfoft Glassdoor review
first_page = 'https://www.glassdoor.com/Reviews/Microsoft-Reviews-E1651.htm'

response = requests.get(first_page)
glassdoor_soup = bs(response.text, 'html.parser')

# Find the total pages for the review section
footer_text = glassdoor_soup.find('div', class_='paginationFooter').text
review_string_list = footer_text.split()
total_reviews = int(review_string_list[5].replace(',', ''))
total_pages = np.ceil((total_reviews/10)+1).astype(int)
print(f'Total Pages to be scraped: {total_pages}')
print("Beginning Scraping...")
# Begin looping through each of the URL review pages
for i in range(1, total_pages):
    
    # If we are not on the first page, then use a URL formatted for pages beyond the first
    if i != 1:
        url = 'https://www.glassdoor.com/Reviews/Microsoft-Reviews-E1651_P'+str(i)+'.htm?filter.iso3Language=eng'
    else:
        url = first_page

    # Use BeautifulSoup to get the needed HTML page
    response = requests.get(url)
    glassdoor_soup = bs(response.text, 'html.parser')
    glassdoor_body = glassdoor_soup.find('div', id = 'ReviewsFeed')
    glassdoor_reviews = glassdoor_body.find_all('li', class_ = 'empReview')
    
    # Start scanning through the webpage element's for the needed data points
    for review in glassdoor_reviews:
        
        # Title of the review
        review_header = review.find_all('a', class_ = 'reviewLink')
        for header in review_header:
            review_title.append(header.text)
        
        # Date the review was posted
        employee_time = review.find_all('span', class_ = 'pt-xsm pt-md-0 css-1qxtz39 eg4psks0')
        for time in employee_time:
            employee_tenure.append(time.text)
        
        # Includes position, location, and if they are still employed by the company
        review_profile = review.find_all('span', class_ = 'common__EiReviewDetailsStyle__newUiJobLine')
        for profile in review_profile:
            reviewer_profile.append(profile.text)
        
        # Overall rating left by the reviewer
        employee_rating = review.find_all('span', class_ = 'ratingNumber mr-xsm')
        for rating in employee_rating:
            overall_rating_number.append(rating.text)
        
        # Positive comments left by the reviewer
        pro_ratings = review.find_all('span', attrs={'data-test': 'pros'})
        for rating in pro_ratings:
            pros.append(rating.text)
        
        # Critical feedback of the position
        con_ratings = review.find_all('span', attrs={'data-test': 'cons'})
        for rating in con_ratings:
            cons.append(rating.text)
        
        # Navigate through the additional review details dropdown menu
        dropdown_ratings = review.find_all('div', class_='tooltipContainer')
        flat_ratings = review.find_all('div', class_='d-flex flex-column align-items-start')
        
        # If there is a dropdown menu, then proceed, else move on to using the overall rating for the dropdown ratings
        if dropdown_ratings:
            for dropdown in dropdown_ratings:
                
                toolbars = dropdown.find_all('ul', class_='pl-0')
                for bars in toolbars:
                    
                    # Set up a counter to see if ratings were posted by the reviewer
                    work_life_counter = 0
                    culture_and_values_counter = 0
                    diversity_and_inclusion_counter = 0
                    career_opportunities_counter = 0
                    compensation_and_benefits_counter = 0
                    senior_management_counter = 0
                    
                    # Find every list item within a dropdown menu if they exist
                    work_life = bars.find_all('li')
                    
                    # Loop through each existing item
                    for a, wl in enumerate(work_life):
                        
                        # CSS class tag assigned to ratings based on a 1-5 scale
                        one_star = r'css.1mfncox'
                        two_stars = r'css.1lp3h8x'
                        three_stars = r'css.k58126'
                        four_stars = r'css.94nhxw'
                        five_stars = r'css.11w4osi'
                        
                        # Scan the list item to see if "Work/Life Balance" is within the HTML element
                        if re.search('Work', str(wl)):
                            
                            # If it is a work/life balance rating, increase the counter by 1
                            work_life_counter = 1
                            
                            # Scan the HTML element for the CSS class tags listed above  
                            if re.search(one_star, str(wl)):
                                work_life_balance.append(1)
                            elif re.search(two_stars, str(wl)):
                                work_life_balance.append(2)
                            elif re.search(three_stars, str(wl)):
                                work_life_balance.append(3)
                            elif re.search(four_stars, str(wl)):
                                work_life_balance.append(4)
                            elif re.search(five_stars, str(wl)):
                                work_life_balance.append(5)
                            
                            
                        elif re.search('Culture', str(wl)):
                            culture_and_values_counter = 1
                            if re.search(one_star, str(wl)):
                                culture_and_values.append(1)
                            elif re.search(two_stars, str(wl)):
                                culture_and_values.append(2)
                            elif re.search(three_stars, str(wl)):
                                culture_and_values.append(3)
                            elif re.search(four_stars, str(wl)):
                                culture_and_values.append(4)
                            elif re.search(five_stars, str(wl)):
                                culture_and_values.append(5)
                        
                            
                        elif re.search('Diversity', str(wl)):
                            diversity_and_inclusion_counter = 1
                            if re.search(one_star, str(wl)):
                                diversity_and_inclusion.append(1)
                            elif re.search(two_stars, str(wl)):
                                diversity_and_inclusion.append(2)
                            elif re.search(three_stars, str(wl)):
                                diversity_and_inclusion.append(3)
                            elif re.search(four_stars, str(wl)):
                                diversity_and_inclusion.append(4)
                            elif re.search(five_stars, str(wl)):
                                diversity_and_inclusion.append(5)
                            
                        elif re.search('Career', str(wl)):
                            career_opportunities_counter = 1
                            if re.search(one_star, str(wl)):
                                career_opportunities.append(1)
                            elif re.search(two_stars, str(wl)):
                                career_opportunities.append(2)
                            elif re.search(three_stars, str(wl)):
                                career_opportunities.append(3)
                            elif re.search(four_stars, str(wl)):
                                career_opportunities.append(4)
                            elif re.search(five_stars, str(wl)):
                                career_opportunities.append(5)
                            
                        elif re.search('Compensation', str(wl)):
                            compensation_and_benefits_counter = 1
                            if re.search(one_star, str(wl)):
                                compensation_and_benefits.append(1)
                            elif re.search(two_stars, str(wl)):
                                compensation_and_benefits.append(2)
                            elif re.search(three_stars, str(wl)):
                                compensation_and_benefits.append(3)
                            elif re.search(four_stars, str(wl)):
                                compensation_and_benefits.append(4)
                            elif re.search(five_stars, str(wl)):
                                compensation_and_benefits.append(5)
                            
                        elif re.search('Senior', str(wl)):
                            senior_management_counter = 1
                            if re.search(one_star, str(wl)):
                                senior_management.append(1)
                            elif re.search(two_stars, str(wl)):
                                senior_management.append(2)
                            elif re.search(three_stars, str(wl)):
                                senior_management.append(3)
                            elif re.search(four_stars, str(wl)):
                                senior_management.append(4)
                            elif re.search(five_stars, str(wl)):
                                senior_management.append(5)
                        
                        # If there is an error, the title and error will be printed
                        else:
                            print(header.text)
                            print('---------Error Here ---------')
                    
                    # If the counter for a rating is equal to 0, append "N/A" for that datapoint
                    if work_life_counter == 0:
                        work_life_balance.append('N/A')
                    if culture_and_values_counter == 0:
                        culture_and_values.append('N/A')
                    if diversity_and_inclusion_counter == 0:
                        diversity_and_inclusion.append('N/A')
                    if career_opportunities_counter == 0:
                        career_opportunities.append('N/A')
                    if compensation_and_benefits_counter == 0:
                        compensation_and_benefits.append('N/A')
                    if senior_management_counter == 0:
                        senior_management.append('N/A')
        
        # If there is no dropdown menu, use the overall rating as the datapoint for each dropdown rating
        elif flat_ratings:
            
            for flat in flat_ratings:
                
                flat_rate_number = flat.find_all('span', class_ = 'ratingNumber')
                
                for num in flat_rate_number:
                    
                    work_life_balance.append(int(num.text[:1]))
                    culture_and_values.append(int(num.text[:1]))
                    diversity_and_inclusion.append(int(num.text[:1]))
                    career_opportunities.append(int(num.text[:1]))
                    compensation_and_benefits.append(int(num.text[:1]))
                    senior_management.append(int(num.text[:1]))
        
        # If there is an error, or the overall rating cannot be found, put an error for each data point.
        else:
            work_life_balance.append('Error')
            culture_and_values.append('Error')
            diversity_and_inclusion.append('Error')
            career_opportunities.append('Error')
            compensation_and_benefits.append('Error')
            senior_management.append('Error')
        
        # Find the element holding the CEO Approval, Recommend, and Business Outlook reivews
        review_mark = review.find_all('div', class_ = 'd-flex align-items-center mr-std')
        
        # Find the individual reviews
        for mark in review_mark:            
            
            fill_color = mark.find_all('span', class_ = 'SVGInline')
            
            for color in fill_color:
                
                # The CSS class assigned to the different answer choices for the ratings
                checkmark = {'class': ['SVGInline', 'css-hcqxoa', 'd-flex']}
                cross = {'class': ['SVGInline', 'css-1kiw93k', 'd-flex']}
                indifferent = {'class': ['SVGInline', 'css-1h93d4v', 'd-flex']}
                no_answer = {'class': ['SVGInline', 'css-10xv9lv', 'd-flex']}
                
                review_type = mark.find_all('span')
                
                # Distinguish in which datapoint the element belongs
                for review in review_type:
                    if review.text == 'Recommend':
                        if color.attrs == checkmark:
                            recommend.append('Yes')
                        elif color.attrs == cross:
                            recommend.append('No')
                        elif color.attrs == indifferent:
                            recommend.append('Indifferent')
                        elif color.attrs == no_answer:
                            recommend.append('No answer')
                    if review.text == 'CEO Approval':
                        if color.attrs == checkmark:
                            ceo_approval.append('Approve')
                        elif color.attrs == cross:
                            ceo_approval.append('Disapprove')
                        elif color.attrs == indifferent:
                            ceo_approval.append('Indifferent')
                        elif color.attrs == no_answer:
                            ceo_approval.append('No answer')
                    if review.text == 'Business Outlook':
                        if color.attrs == checkmark:
                            business_outlook.append('Good')
                        elif color.attrs == cross:
                            business_outlook.append('Bad')
                        elif color.attrs == indifferent:
                            business_outlook.append('Neutral')
                        elif color.attrs == no_answer:
                            business_outlook.append('No answer')
    print(f'Page {i} is complete')
print('Scraping Complete')

Total Pages to be scraped: 3579
Beginning Scraping...
Page 1 is complete
Page 2 is complete
Page 3 is complete
Page 4 is complete
Page 5 is complete
Page 6 is complete
Page 7 is complete
Page 8 is complete
Page 9 is complete
Page 10 is complete
Page 11 is complete
Page 12 is complete
Page 13 is complete
Page 14 is complete
Page 15 is complete
Page 16 is complete
Page 17 is complete
Page 18 is complete
Page 19 is complete
Page 20 is complete
Page 21 is complete
Page 22 is complete
Page 23 is complete
Page 24 is complete
Page 25 is complete
Page 26 is complete
Page 27 is complete
Page 28 is complete
Page 29 is complete
Page 30 is complete
Page 31 is complete
Page 32 is complete
Page 33 is complete
Page 34 is complete
Page 35 is complete
Page 36 is complete
Page 37 is complete
Page 38 is complete
Page 39 is complete
Page 40 is complete
Page 41 is complete
Page 42 is complete
Page 43 is complete
Page 44 is complete
Page 45 is complete
Page 46 is complete
Page 47 is complete
Page 48 is comp

Page 394 is complete
Page 395 is complete
Page 396 is complete
Page 397 is complete
Page 398 is complete
Page 399 is complete
Page 400 is complete
Page 401 is complete
Page 402 is complete
Page 403 is complete
Page 404 is complete
Page 405 is complete
Page 406 is complete
Page 407 is complete
Page 408 is complete
Page 409 is complete
Page 410 is complete
Page 411 is complete
Page 412 is complete
Page 413 is complete
Page 414 is complete
Page 415 is complete
Page 416 is complete
Page 417 is complete
Page 418 is complete
Page 419 is complete
Page 420 is complete
Page 421 is complete
Page 422 is complete
Page 423 is complete
Page 424 is complete
Page 425 is complete
Page 426 is complete
Page 427 is complete
Page 428 is complete
Page 429 is complete
Page 430 is complete
Page 431 is complete
Page 432 is complete
Page 433 is complete
Page 434 is complete
Page 435 is complete
Page 436 is complete
Page 437 is complete
Page 438 is complete
Page 439 is complete
Page 440 is complete
Page 441 is c

Page 785 is complete
Page 786 is complete
Page 787 is complete
Page 788 is complete
Page 789 is complete
Page 790 is complete
Page 791 is complete
Page 792 is complete
Page 793 is complete
Page 794 is complete
Page 795 is complete
Page 796 is complete
Page 797 is complete
Page 798 is complete
Page 799 is complete
Page 800 is complete
Page 801 is complete
Page 802 is complete
Page 803 is complete
Page 804 is complete
Page 805 is complete
Page 806 is complete
Page 807 is complete
Page 808 is complete
Page 809 is complete
Page 810 is complete
Page 811 is complete
Page 812 is complete
Page 813 is complete
Page 814 is complete
Page 815 is complete
Page 816 is complete
Page 817 is complete
Page 818 is complete
Page 819 is complete
Page 820 is complete
Page 821 is complete
Page 822 is complete
Page 823 is complete
Page 824 is complete
Page 825 is complete
Page 826 is complete
Page 827 is complete
Page 828 is complete
Page 829 is complete
Page 830 is complete
Page 831 is complete
Page 832 is c

Page 1168 is complete
Page 1169 is complete
Page 1170 is complete
Page 1171 is complete
Page 1172 is complete
Page 1173 is complete
Page 1174 is complete
Page 1175 is complete
Page 1176 is complete
Page 1177 is complete
Page 1178 is complete
Page 1179 is complete
Page 1180 is complete
Page 1181 is complete
Page 1182 is complete
Page 1183 is complete
Page 1184 is complete
Page 1185 is complete
Page 1186 is complete
Page 1187 is complete
Page 1188 is complete
Page 1189 is complete
Page 1190 is complete
Page 1191 is complete
Page 1192 is complete
Page 1193 is complete
Page 1194 is complete
Page 1195 is complete
Page 1196 is complete
Page 1197 is complete
Page 1198 is complete
Page 1199 is complete
Page 1200 is complete
Page 1201 is complete
Page 1202 is complete
Page 1203 is complete
Page 1204 is complete
Page 1205 is complete
Page 1206 is complete
Page 1207 is complete
Page 1208 is complete
Page 1209 is complete
Page 1210 is complete
Page 1211 is complete
Page 1212 is complete
Page 1213 

Page 1541 is complete
Page 1542 is complete
Page 1543 is complete
Page 1544 is complete
Page 1545 is complete
Page 1546 is complete
Page 1547 is complete
Page 1548 is complete
Page 1549 is complete
Page 1550 is complete
Page 1551 is complete
Page 1552 is complete
Page 1553 is complete
Page 1554 is complete
Page 1555 is complete
Page 1556 is complete
Page 1557 is complete
Page 1558 is complete
Page 1559 is complete
Page 1560 is complete
Page 1561 is complete
Page 1562 is complete
Page 1563 is complete
Page 1564 is complete
Page 1565 is complete
Page 1566 is complete
Page 1567 is complete
Page 1568 is complete
Page 1569 is complete
Page 1570 is complete
Page 1571 is complete
Page 1572 is complete
Page 1573 is complete
Page 1574 is complete
Page 1575 is complete
Page 1576 is complete
Page 1577 is complete
Page 1578 is complete
Page 1579 is complete
Page 1580 is complete
Page 1581 is complete
Page 1582 is complete
Page 1583 is complete
Page 1584 is complete
Page 1585 is complete
Page 1586 

Page 1914 is complete
Page 1915 is complete
Page 1916 is complete
Page 1917 is complete
Page 1918 is complete
Page 1919 is complete
Page 1920 is complete
Page 1921 is complete
Page 1922 is complete
Page 1923 is complete
Page 1924 is complete
Page 1925 is complete
Page 1926 is complete
Page 1927 is complete
Page 1928 is complete
Page 1929 is complete
Page 1930 is complete
Page 1931 is complete
Page 1932 is complete
Page 1933 is complete
Page 1934 is complete
Page 1935 is complete
Page 1936 is complete
Page 1937 is complete
Page 1938 is complete
Page 1939 is complete
Page 1940 is complete
Page 1941 is complete
Page 1942 is complete
Page 1943 is complete
Page 1944 is complete
Page 1945 is complete
Page 1946 is complete
Page 1947 is complete
Page 1948 is complete
Page 1949 is complete
Page 1950 is complete
Page 1951 is complete
Page 1952 is complete
Page 1953 is complete
Page 1954 is complete
Page 1955 is complete
Page 1956 is complete
Page 1957 is complete
Page 1958 is complete
Page 1959 

Page 2287 is complete
Page 2288 is complete
Page 2289 is complete
Page 2290 is complete
Page 2291 is complete
Page 2292 is complete
Page 2293 is complete
Page 2294 is complete
Page 2295 is complete
Page 2296 is complete
Page 2297 is complete
Page 2298 is complete
Page 2299 is complete
Page 2300 is complete
Page 2301 is complete
Page 2302 is complete
Page 2303 is complete
Page 2304 is complete
Page 2305 is complete
Page 2306 is complete
Page 2307 is complete
Page 2308 is complete
Page 2309 is complete
Page 2310 is complete
Page 2311 is complete
Page 2312 is complete
Page 2313 is complete
Page 2314 is complete
Page 2315 is complete
Page 2316 is complete
Page 2317 is complete
Page 2318 is complete
Page 2319 is complete
Page 2320 is complete
Page 2321 is complete
Page 2322 is complete
Page 2323 is complete
Page 2324 is complete
Page 2325 is complete
Page 2326 is complete
Page 2327 is complete
Page 2328 is complete
Page 2329 is complete
Page 2330 is complete
Page 2331 is complete
Page 2332 

Page 2660 is complete
Page 2661 is complete
Page 2662 is complete
Page 2663 is complete
Page 2664 is complete
Page 2665 is complete
Page 2666 is complete
Page 2667 is complete
Page 2668 is complete
Page 2669 is complete
Page 2670 is complete
Page 2671 is complete
Page 2672 is complete
Page 2673 is complete
Page 2674 is complete
Page 2675 is complete
Page 2676 is complete
Page 2677 is complete
Page 2678 is complete
Page 2679 is complete
Page 2680 is complete
Page 2681 is complete
Page 2682 is complete
Page 2683 is complete
Page 2684 is complete
Page 2685 is complete
Page 2686 is complete
Page 2687 is complete
Page 2688 is complete
Page 2689 is complete
Page 2690 is complete
Page 2691 is complete
Page 2692 is complete
Page 2693 is complete
Page 2694 is complete
Page 2695 is complete
Page 2696 is complete
Page 2697 is complete
Page 2698 is complete
Page 2699 is complete
Page 2700 is complete
Page 2701 is complete
Page 2702 is complete
Page 2703 is complete
Page 2704 is complete
Page 2705 

Page 3033 is complete
Page 3034 is complete
Page 3035 is complete
Page 3036 is complete
Page 3037 is complete
Page 3038 is complete
Page 3039 is complete
Page 3040 is complete
Page 3041 is complete
Page 3042 is complete
Page 3043 is complete
Page 3044 is complete
Page 3045 is complete
Page 3046 is complete
Page 3047 is complete
Page 3048 is complete
Page 3049 is complete
Page 3050 is complete
Page 3051 is complete
Page 3052 is complete
Page 3053 is complete
Page 3054 is complete
Page 3055 is complete
Page 3056 is complete
Page 3057 is complete
Page 3058 is complete
Page 3059 is complete
Page 3060 is complete
Page 3061 is complete
Page 3062 is complete
Page 3063 is complete
Page 3064 is complete
Page 3065 is complete
Page 3066 is complete
Page 3067 is complete
Page 3068 is complete
Page 3069 is complete
Page 3070 is complete
Page 3071 is complete
Page 3072 is complete
Page 3073 is complete
Page 3074 is complete
Page 3075 is complete
Page 3076 is complete
Page 3077 is complete
Page 3078 

Page 3406 is complete
Page 3407 is complete
Page 3408 is complete
Page 3409 is complete
Page 3410 is complete
Page 3411 is complete
Page 3412 is complete
Page 3413 is complete
Page 3414 is complete
Page 3415 is complete
Page 3416 is complete
Page 3417 is complete
Page 3418 is complete
Page 3419 is complete
Page 3420 is complete
Page 3421 is complete
Page 3422 is complete
Page 3423 is complete
Page 3424 is complete
Page 3425 is complete
Page 3426 is complete
Page 3427 is complete
Page 3428 is complete
Page 3429 is complete
Page 3430 is complete
Page 3431 is complete
Page 3432 is complete
Page 3433 is complete
Page 3434 is complete
Page 3435 is complete
Page 3436 is complete
Page 3437 is complete
Page 3438 is complete
Page 3439 is complete
Page 3440 is complete
Page 3441 is complete
Page 3442 is complete
Page 3443 is complete
Page 3444 is complete
Page 3445 is complete
Page 3446 is complete
Page 3447 is complete
Page 3448 is complete
Page 3449 is complete
Page 3450 is complete
Page 3451 

In [3]:
# Create a zipped list from all of the data gather thus far 
glassdoor_data = list(zip(review_title, employee_tenure,
                            reviewer_profile, overall_rating_number, work_life_balance, culture_and_values,
                            diversity_and_inclusion, career_opportunities, compensation_and_benefits,
                            senior_management, pros, cons, recommend,
                            ceo_approval, business_outlook))

# Create a dataframe from the zipped list
glassdoor_df = pd.DataFrame(glassdoor_data, columns = 
                            ['Review Title','Reviewer Position',
                             'Review Profile','Rating', 'Work-Life Balance', 'Culture and Values', 
                             'Diversity and Inclusion', 'Career opportunities', 'Compensation and Benefits',
                             'Senior Management', 'Pros','Cons',
                             'Recommend','CEO Approval','Business Outlook'])

# Have the index start at one for personal aesthetic reasons
glassdoor_df.index += 1

# Create a copy of the DataFrame for manipulation
glassdoor_manip_df = glassdoor_df.copy()

glassdoor_manip_df

Unnamed: 0,Review Title,Reviewer Position,Review Profile,Rating,Work-Life Balance,Culture and Values,Diversity and Inclusion,Career opportunities,Compensation and Benefits,Senior Management,Pros,Cons,Recommend,CEO Approval,Business Outlook
1,Microsoft is great company,"Current Employee, more than 1 year","Jan 7, 2023 - Software Engineer in Redmond, WA",5.0,5,5,5,5,4,5,I like the work life balance,No cons as of now,Yes,Approve,No answer
2,Thoughts after 10 years....,"Current Employee, more than 10 years","Jan 28, 2013 - Anonymous Employee in Redmond, WA",4.0,4,2,,2,4,,"1. If you love tech, this is a great place. ...",Brand on Your Resume: After many years of los...,No answer,No answer,No answer
3,Great company!,"Current Employee, more than 3 years","Dec 27, 2022 - Senior Customer Success Account...",4.0,4,5,5,5,5,5,I love working at Microsoft. We have great cul...,PTO - We have 3 weeks of PTO which is good com...,Yes,Approve,Good
4,Company takes care of their people!,"Current Employee, more than 1 year","Jan 5, 2023 - Sr Program Manager in Seattle, WA",5.0,5,5,5,5,5,5,"The ""deal"" is incredible from salary to benefi...",It is sometimes difficult to see what your pat...,Yes,Approve,Good
5,The cons are of my former team NOT the company...,"Former Employee, more than 1 year","Dec 27, 2022 - Digital Sales Specialist",5.0,5,5,5,5,5,1,"Great pay, generous training curve, tuition re...",My former manager was incompetent and not the ...,No answer,No answer,No answer
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34444,Working at Microsoft,Former Employee,"Jan 20, 2016 -",4.0,3,4,4,4,1,2,Work life is great. You need to work hard.,Everything is cool and you should keep on work...,No answer,No answer,No answer
34445,Good company ro work for,Current Employee,"Feb 8, 2016 -",4.0,5,3,,4,3,3,"Exciting technology good package, flexible wor...",Can be rigid especially when working with the ...,Yes,Approve,Neutral
34446,HR Manager,"Current Employee, more than 3 years","Feb 13, 2016 - Human Resources Manager in Sing...",4.0,4,3,,5,4,4,Great opportunities for those that are driven ...,Career path is not always clear.,Yes,No answer,Good
34447,Summer Analyst,Former Employee,"Feb 20, 2016 - Intern in Milan",4.0,5,4,,5,3,4,"Good working environment, Friendly people, Wel...","Sometimes lack of organisation, not competitiv...",Yes,Approve,No answer


In [4]:
# Split the Reviewer Position datafield into their current status and duration of employement
glassdoor_manip_df[['Employee Status', 'Duration of Employment']] = glassdoor_manip_df['Reviewer Position'].str.split(', ', 1, expand=True)

# Split the Reviewer Profile into the date the review was made and location in which they worked
glassdoor_manip_df[['Review Date', 'Reviewer Position/Location']] = glassdoor_manip_df['Review Profile'].str.split(' - ', 1, expand=True)

# Split the Reviewer Position/Location by their Role and Location
glassdoor_manip_df[['Reviewer Role', 'Reviewer Location']] = glassdoor_manip_df['Reviewer Position/Location'].str.split('in ', 1, expand=True)

# Split the Reviewer location up by City and State
glassdoor_manip_df[['City', 'State']] = glassdoor_manip_df['Reviewer Location'].str.split(', ', 1, expand=True)

glassdoor_manip_df

Unnamed: 0,Review Title,Reviewer Position,Review Profile,Rating,Work-Life Balance,Culture and Values,Diversity and Inclusion,Career opportunities,Compensation and Benefits,Senior Management,...,CEO Approval,Business Outlook,Employee Status,Duration of Employment,Review Date,Reviewer Position/Location,Reviewer Role,Reviewer Location,City,State
1,Microsoft is great company,"Current Employee, more than 1 year","Jan 7, 2023 - Software Engineer in Redmond, WA",5.0,5,5,5,5,4,5,...,Approve,No answer,Current Employee,more than 1 year,"Jan 7, 2023","Software Engineer in Redmond, WA",Software Engineer,"Redmond, WA",Redmond,WA
2,Thoughts after 10 years....,"Current Employee, more than 10 years","Jan 28, 2013 - Anonymous Employee in Redmond, WA",4.0,4,2,,2,4,,...,No answer,No answer,Current Employee,more than 10 years,"Jan 28, 2013","Anonymous Employee in Redmond, WA",Anonymous Employee,"Redmond, WA",Redmond,WA
3,Great company!,"Current Employee, more than 3 years","Dec 27, 2022 - Senior Customer Success Account...",4.0,4,5,5,5,5,5,...,Approve,Good,Current Employee,more than 3 years,"Dec 27, 2022",Senior Customer Success Account Manager in Cha...,Senior Customer Success Account Manager,"Charlotte, NC",Charlotte,NC
4,Company takes care of their people!,"Current Employee, more than 1 year","Jan 5, 2023 - Sr Program Manager in Seattle, WA",5.0,5,5,5,5,5,5,...,Approve,Good,Current Employee,more than 1 year,"Jan 5, 2023","Sr Program Manager in Seattle, WA",Sr Program Manager,"Seattle, WA",Seattle,WA
5,The cons are of my former team NOT the company...,"Former Employee, more than 1 year","Dec 27, 2022 - Digital Sales Specialist",5.0,5,5,5,5,5,1,...,No answer,No answer,Former Employee,more than 1 year,"Dec 27, 2022",Digital Sales Specialist,Digital Sales Specialist,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34444,Working at Microsoft,Former Employee,"Jan 20, 2016 -",4.0,3,4,4,4,1,2,...,No answer,No answer,Former Employee,,"Jan 20, 2016",,,,,
34445,Good company ro work for,Current Employee,"Feb 8, 2016 -",4.0,5,3,,4,3,3,...,Approve,Neutral,Current Employee,,"Feb 8, 2016",,,,,
34446,HR Manager,"Current Employee, more than 3 years","Feb 13, 2016 - Human Resources Manager in Sing...",4.0,4,3,,5,4,4,...,No answer,Good,Current Employee,more than 3 years,"Feb 13, 2016",Human Resources Manager in Singapore,Human Resources Manager,Singapore,Singapore,
34447,Summer Analyst,Former Employee,"Feb 20, 2016 - Intern in Milan",4.0,5,4,,5,3,4,...,Approve,No answer,Former Employee,,"Feb 20, 2016",Intern in Milan,Intern,Milan,Milan,


In [5]:
# Create an order to use newly created columns for the DataFrame
cols = ['Review Title', 'Rating', 'Work-Life Balance', 'Culture and Values', 'Diversity and Inclusion',
        'Career opportunities', 'Compensation and Benefits',
        'Senior Management','Reviewer Role', 'Employee Status', 'City', 'State', 'Duration of Employment',
        'Review Date', 'Pros', 'Cons', 'Recommend', 'CEO Approval', 'Business Outlook']

glassdoor_manip_df = glassdoor_manip_df[cols]

# Replace any blanks with 'N/A' and strip excess whitespace from the datapoints
glassdoor_manip_df['Reviewer Role'] = glassdoor_manip_df['Reviewer Role'].replace('', 'N/A', regex=True)
glassdoor_manip_df.columns = glassdoor_manip_df.columns.str.strip()

glassdoor_manip_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  glassdoor_manip_df['Reviewer Role'] = glassdoor_manip_df['Reviewer Role'].replace('', 'N/A', regex=True)


Unnamed: 0,Review Title,Rating,Work-Life Balance,Culture and Values,Diversity and Inclusion,Career opportunities,Compensation and Benefits,Senior Management,Reviewer Role,Employee Status,City,State,Duration of Employment,Review Date,Pros,Cons,Recommend,CEO Approval,Business Outlook
1,Microsoft is great company,5.0,5,5,5,5,4,5,Software Engineer,Current Employee,Redmond,WA,more than 1 year,"Jan 7, 2023",I like the work life balance,No cons as of now,Yes,Approve,No answer
2,Thoughts after 10 years....,4.0,4,2,,2,4,,Anonymous Employee,Current Employee,Redmond,WA,more than 10 years,"Jan 28, 2013","1. If you love tech, this is a great place. ...",Brand on Your Resume: After many years of los...,No answer,No answer,No answer
3,Great company!,4.0,4,5,5,5,5,5,Senior Customer Success Account Manager,Current Employee,Charlotte,NC,more than 3 years,"Dec 27, 2022",I love working at Microsoft. We have great cul...,PTO - We have 3 weeks of PTO which is good com...,Yes,Approve,Good
4,Company takes care of their people!,5.0,5,5,5,5,5,5,Sr Program Manager,Current Employee,Seattle,WA,more than 1 year,"Jan 5, 2023","The ""deal"" is incredible from salary to benefi...",It is sometimes difficult to see what your pat...,Yes,Approve,Good
5,The cons are of my former team NOT the company...,5.0,5,5,5,5,5,1,Digital Sales Specialist,Former Employee,,,more than 1 year,"Dec 27, 2022","Great pay, generous training curve, tuition re...",My former manager was incompetent and not the ...,No answer,No answer,No answer
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34444,Working at Microsoft,4.0,3,4,4,4,1,2,,Former Employee,,,,"Jan 20, 2016",Work life is great. You need to work hard.,Everything is cool and you should keep on work...,No answer,No answer,No answer
34445,Good company ro work for,4.0,5,3,,4,3,3,,Current Employee,,,,"Feb 8, 2016","Exciting technology good package, flexible wor...",Can be rigid especially when working with the ...,Yes,Approve,Neutral
34446,HR Manager,4.0,4,3,,5,4,4,Human Resources Manager,Current Employee,Singapore,,more than 3 years,"Feb 13, 2016",Great opportunities for those that are driven ...,Career path is not always clear.,Yes,No answer,Good
34447,Summer Analyst,4.0,5,4,,5,3,4,Intern,Former Employee,Milan,,,"Feb 20, 2016","Good working environment, Friendly people, Wel...","Sometimes lack of organisation, not competitiv...",Yes,Approve,No answer


In [6]:
# Save the DatFrame into a csv file  
glassdoor_manip_df.to_csv('Company Reviews\Microsoft Reviews.csv', header=True, index=False)