In [4]:
import pandas as pd
import numpy as np
import time 
from datetime import datetime

from collections import Counter
import nltk
import string
from nltk.tokenize import word_tokenize

import math

from nltk import pos_tag
from nltk.stem import PorterStemmer

# read scraped info

excel = pd.read_excel('full_followers.xlsx')

# create a dataframe

new_df = excel[['id','candidate','name','screen_name','location','description','followers_count','friends_count','created_at','favourites_count','statuses_count']]

new_df['daily_avg_tweets'] = new_df.apply(lambda x: x.statuses_count/float((datetime.utcnow().date()-datetime.strptime(x.created_at, "%a %b %d  %H:%M:%S %z %Y").date()).days),axis=1)

new_df = new_df.replace(np.nan, '', regex=True)
     
# define jobs and categories
    
sub_categories = {
    'Managers': {'Hospitality','services','Production','specialized services','Administrative','commercial',
                 'Chief executives','senior officials','legislators','Retail','wholesale','Hotel','restaurant',
                 'Business','administration','Sales','marketing','Managing directors','directors',
                 'CEO','CTO','CFO','CMO','HR','MD','manager','team','lead','boss','client','founder','Chief',
                 'entrepreneur','startup','start-up','Human Resources','Bank','mpp'
                },
    
    'Professional': {'Science','engineering','Health','Teaching','Information','communications','technology','Legal',
                     'cultural','Financial','mathematical','Author','journalist','linguist','therapist','Librarian',
                     'archivist','curator','Database','network','Software','applications','write','library',
                     'developer','analyst','University','education','Veterinarian','medicine','Architect','planner',
                     'surveyor','designer','actuary','statistician','Life science','earth science','science',
                     'professional','photographer','editor','lawyer','scientist','Psychologist','advocate','photography',
                     'writer','nurse','physics','chemistry','mathematics','space','astronomy','geology','learn',
                     'learning','professor','Criminologist','teacher','specialist','Strategist','healthcare','Cybersecurity',
                     'cyber','engineer','research','reasearcher','Psychologist','Ecologist','Botanist','computing','computer',
                     'doctor','Educator','Barrister','Solicitor','Notary'
                    },
    
    'Technicians and Associate Professionals':{'associate','technical','paraLegal','cultural','Telecommunications',
                                               'broadcasting','Artistic','cultural','culinary','Paramedic',
                                               'Regulatory','government','purchasing agents','brokers','Financial'
                                               ,'Veterinary technicians','Traditional','complementary','Nursing','midwifery',
                                               'pharmaceutical','Ship','aircraft','controllers','Life science','Process control'
                                               ,'Mining','manufacturing','construction','store','camera','cameraman','consultant',
                                               'officer','committee','ministry','planner','educator','Designer','Graphic','Networker',
                                               'Sailor','ship','deck','crew','cab','Publisher'
                                              },
    
    'Clerical Support Workers':{'General','keyboard','office','Customer service','Numerical','material recording',
                                'Cashiers','Tellers','money collectors','Secretaries','collector','assistant','correspondent'
                               },
    
    'Service and Sales Workers':{'Personal service','Sales','Personal care','Protective services','Child care','Street',
                                 'Building','housekeeping','Hairdressers','beauticians','Waiters','bartenders','Cooks',
                                'Travel attendants','conductors','guides','service','caretakers','attendants','personal',
                                 'disabilities','insurance'
                                },
    
    'Skilled Agricultural, Forestry and Fishery workers':{ 'Market-oriented','forestry','fishery','hunt','Subsistence','crop',
                                                          'livestock','trapper','gatherer','farmer','grower','estate',
                                                          'agriculture','raising','Hunter','Fisherman'
                                                         },
    
    'Craft and related Trades Workers':{'Building','frame','finishers','Painters','Sheet','structural','metal',
                                        'workers','moulders','welders','Blacksmiths','toolmakers','Machinery','mechanics',
                                        'repairers','Handicraft','printing','Electrical','electronic','installers',
                                        'Food processing','wood working','garment','electrician','brewer','baker','craft','Woodworker'
                                       },
    
    'Plant and Machine Operators, and Assemblers':{'Stationary','machine','operators','mineral','processing',
                                                  'Metal','Chemical','photographic','Rubber','plastic','paper','products',
                                                   'Textile','fur','leather','wood','paper','Assemblers','drivers',
                                                   'Locomotive engine','Heavy truck'
                                                  },
    
    'Elementary Occupations':{'Cleaners','helpers','Domestic','hotel','Vehicle','window','laundry','Agricultural','merchandising',
                              'forestry','fishery','labourers','Agricultural','construction','manufacturing','supervisor'
                              'transport','storage','Food preparation','assistants','Street','vendors','elementary',
                              'labour','garden','gardener','janitor','husband','wife','mom','dad','grand','parent','healer',
                              'Warehouse'
                             },
    
    'Armed forces Occupations':{ 'armed','forces','Commissioned','police','guards','security','law enforcement','enforcement',
                                'body guards','personal security','policing','soldier','military','defence'
                               },
    
    'Sports & Fitness industry':{'martial arts','fight','player','wrestling','athlete','sport','sports','coach',
                                 'gym','trainer','fitness','calisthenics','skate','football','soccer','hockey','basketball',
                                 'racing','yoga','training','lifter','baseball','games','gamer','Tennis'
                                },
    
    
    'Religious':{ 'Jesus','religious','preach','word','god','religion','muslim','christian','catholic','priest',
                 'hindu','sikh','buddhist','jain','allah','pastor','karma','christ','waheguru','holy','Spiritual'
                },
    
    'Politicians':{ 'Greens','leader','politics','election','Liberal','conservative','democratic','republic','policy','political',
                   'politician','Candidate','president','counsel','public affairs','democracy','republic','association',
                   'Green Party','Federal'
                  },
    
    'Environmentalist':{ 'environment','ecofriendly','global warming','climate change','warming','change','protector',
                        'solar','renewables','recycling','weather','earth','environmentalist','future','traditional',
                        'nature','resources','waste','water','shortage','seasons','climate','Eco'
                       },
    
    'Social Activist':{'social','activist','justice','injustice','BlackLivesMatter','black','lives','matter','feminist','equality',
                       'life','liberty','mask','racism','care','human rights','animal rights','rights','progress','opinion',
                       'Indigenous','fascism','resistance','Representative','Loyalist','consequence','union','development',
                       'motivate','growth','community','volunteer','life','positive','nationalist','patriot','system','peace',
                       'culture','support','survivor','quarantine','COVID19','coronavirus','LGBT','gay','LGBTQ','trans','transgender',
                       'lesbian','bisexual','bi','socialist','humanist','charity','civic','victim','victimized','Journalist',
                       'Defender','public','Disability','nonprofit','ngo','non-profit','wearamask','stayhome','union','Canadian'
                      },
    
    'Influencers and Content Creators':{'youtube','instagram','social','media','followers','following','Creative','performing','artists',
                                        'sing','music','video','movie','photo','model','travel','musician','endorsements',
                                        'sponsor','Tweet','host','enthusiast','breath','blog','blogger','artist','pop','rock','speaker',
                                        'anchor','reporter','actor','actress','star','famous','adventuring','adventures','drummer',
                                        'guitar','piano','synthesizer','DJ','trolls'
                                       }
    
}
    
# load subcategories

sub_categories = dict((k.lower(),[item.lower() for item in v]) for k,v in sub_categories.items())

j = []

for k,v in sub_categories.items():
    j.append(pos_tag(v))
    
x = []
for d in j:
    for s in d:
        x.append(s[1])
        
x = set(x)

# use stemmer to read root word from given word


ps = PorterStemmer()

# process the job description.
def prepare_job_desc(desc):
    # tokenize description.
    tokens = word_tokenize(desc)
        
    # Parts of speech (POS) tag tokens.
    token_tag = pos_tag(tokens)
    
    # Only include some of the POS tags.
    include_tags = list(x)
    filtered_tokens = [tok for tok, tag in token_tag if tag in include_tags]
    
    # stem words.
    stemmed_tokens = [ps.stem(tok).lower() for tok in filtered_tokens]
    return set(stemmed_tokens)

# 

new_df['job_description_word_set'] = new_df['description'].map(prepare_job_desc)

# process the keywords
job_set = {}
job_dict = {}

for k,v in sub_categories.items():

    job_set[k] = set([ps.stem(tok) for tok in v]) # stem the keywords (since the job description is also stemmed.)
    job_dict[k] = {ps.stem(tok):tok for tok in v} # use this dictionary to revert the stemmed words back to the original.
    
# 
    
num_postings = len(new_df.index)

tool_data = []

for i in range(num_postings):
    
    fin =[]
    
    tool_list = []

    job_desc_set = new_df.iloc[i]['job_description_word_set']
    
    # check if the keywords are in the job description. Look for exact match by token.
    for k,v in job_set.items():
        v = [item.lower() for item in v]
        v = set(v)
        tool_words = v.intersection(job_desc_set)
        
        if len(tool_words) !=0:
            for i in (list(tool_words)):
                      fin.append(i)
            
        new = list(set(fin))

        
    if len(new)>0:
        for h in new:
            tool_list.append(h.lower())
            
    tool_data.append(
    {
        'tools':list(set(tool_list))
    }
    )

tool_df = pd.DataFrame(tool_data)

new_df = pd.merge(new_df,tool_df,right_index=True, left_index=True)

# function to read job details and skills

def myfun1(p):
    
    res =[]

    for k,v in job_dict.items():
        for k1,v1 in v.items():
            if len(p)==1:
                if p[0] == k1.lower():res.append(v[p[0]])
            elif len(p)>1:
                for z in p:
                    if z == v1.lower():res.append(v[z])
                        
    if len(res)>1:
        res = list(set(res))

    return res

# function to match subcategory to skills

def myfun2(p):
    
    res =[]
    
    for k,v in sub_categories.items():
        v = [item.lower() for item in v]
        if len(p)==1:
            if p[0] in list(v):res.append(k) 
        elif len(p)>1:
            for z in p:
                if z in list(v):res.append(k)
                    
    if len(res)>1:
        res = list(set(res))

    return res

new_df.tools = new_df.tools.apply(lambda x: myfun1(x))

new_df['category'] = new_df.tools.apply(lambda x: myfun2(x))

new_df.to_excel('follower_description.xlsx',index = False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
