In [18]:
import pandas as pd
from datetime import datetime
import pymongo
from pymongo import MongoClient
client = MongoClient()
db = client['stud28']
companies = db.companies

Everytime we used redis is because we want to achive fast retrival- fast manner if the request is legal in these cases and thus,the user can get fast results. 
Every time we used mongo is because we want to store the data in the presistent storage and also we used mongo for backup in case the redis will crash and we do not want to lose information.

# Mongo Document Structure (companies collection, document foreach company)

In [19]:
# job_list and application_ list are implemnted as set
# application  is acctually a set
#{'company_name':str, 
# 'company_description':str,
# 'jobs_list':({'job_id':int,
#               'job_name': str, 
#               'location': str,
#               'requirements':set,
#               'publish_date':date,
#               'status':str,
#               'applications_list':({'candidate_name':str,
#                                      'email':str,
#                                      'linkedin':str, 
#                                      'skills': set,
#                                      'application_date':datetime 
#                                      })
#              })
#}

# Data model explaination

In [20]:
#For allowing the operational side to have quick repsonse we are having some of the data in the cahce,
#the cached date (redis inctances) should not have redudant data and have only data which is nesseceray for calcultaion,
#we broke each one of the functions for two parts, one that updates our cached data, and the second is updating our mongo db
#all the get operators are reading only from cache

#most of the calls are in the job_id company_name agg level so during the excrecise we will use a combined key 
# combined_id = company_name+"_"+jobid'

In [21]:
import redis
r = redis.StrictRedis(host='bdl1.eng.tau.ac.il', port=6379)

# redis data structure

In [22]:
#1. Candidates_Mails_Per_Position
#  structure: combined_id+"_mail_stud28": sorted set of candidate emails
#  will support fast operation of show_candidates, will be updated in add_application
#
#2. Jobs_Per_Location
#  structure: location+"_stud28": list of combined_ids
#  will support fast operation of show_number_of_jobs, will be updated in add_job
#  
#
#3. Requirement_Per_Position
#  combined_id+"_req_stud28" : set of requiremnts (will allow easy intersect with the candidate skilll set)
#  while recieving application we need to decide on the operatinal side how many requirements the candidate answer so we will be able to reutrn the sorted candidae list, for that we should save on cache the requirements for 
#  each job, will be updated in add_job
#  
#4. Status_Per_Position
#  combined_id+"_status_stud28": status
#  will support a fast access to the staus of each position, for supporting show_number_of_jobs
#
#5. Max_Job_ID_Per_Company
#  comapny+"_stud28": max_job_id
#  in case the company exsit but didnt post a job yet the max_job_id is set to 0,  support the prevention of duplication in add_company and add_job operators (the new id will be the inc of the max)

# Operation 1 - add_company

In [23]:
def add_company(company):
    #Max_Job_ID_Per_Company
    if r.exists(company['company_name']+"_stud28"):
        print ("company name already exists")
    elif '_' in company['company_name']:
        print("Please insert company name without _") # for allowing to use in combined id
        return
    else:
        add_company_to_redis(company)
        add_company_to_mongo(company)
    
    
def add_company_to_redis(company):
    #Max_Job_ID_Per_Company
    r.set(company['company_name']+"_stud28", 0)
    print '{} created'.format(company)
    

def add_company_to_mongo(company):
    companies.insert_one(company)

# Operation 2 - add_job

In [24]:
def add_job(job, company_name):
    #Max_Job_ID_Per_Company
    r.incr(company_name+"_stud28") #Assuming the company exists
    job_id = r.get(company_name+"_stud28")
    add_job_to_redis(job_id,job,company_name)
    add_job_to_mongo(job_id,job,company_name)
  
  
def add_job_to_redis(job_id,job,company_name):
    combined_id = company_name+"_"+str(job_id)
    #Requirement_Per_Position
    r.sadd(combined_id+"_req_stud28", job['requirements'])
    #Status_Per_Position
    r.set(combined_id+"_status_stud28", job['status'])
    #Jobs_Per_Location
    r.sadd(job['location']+"_stud28",combined_id)
    print '{} added {} position, position id - {}'.format(company_name, job['job_name'], str(job_id))
  
  
def add_job_to_mongo(job_id,job,company_name):
    job_details = {'job_id':int(job_id),
                  'job_name': job['job_name'],
                  'location':job['location'],
                  'requirements': job['requirements'],
                  'publish_date': job['publish_date'],
                  'status': job['status'],
                  'applications_list':[]}
    companies.update_one({"company_name": company_name},
   {
   "$addToSet": {"job_list": job_details},
   "$currentDate": {"lastModified": True}
   })
  
  


# Operation 4 - update_job_status

In [25]:
def update_job_status(company_name, job_id, new_status):
    update_job_status_redis(company_name, job_id, new_status)
    update_job_status_mongo(company_name, job_id, new_status)


def update_job_status_redis(company_name, job_id, new_status):
    combined_id = company_name+"_"+str(job_id)
    #Status_Per_Position
    r.set(combined_id+"_status_stud28", new_status)
    print 'job_id {} in {} is now {}'.format(job_id, company_name, new_status)
  
  

def update_job_status_mongo(company_name, job_id, new_status):
    companies.update_one({"company_name": company_name,
                           "job_list.job_id":int(job_id)
                          },
                          {
                        "$set": {"job_list.$.status": new_status},
                        "$currentDate": {"lastModified": True}})

# Operation 3  - add_application

In [26]:
def add_application_(candidate, job_id, company_name):
    combined_id = company_name+"_"+str(job_id)
    #Status_Per_Position
    position_status = r.get(combined_id+"_status_stud28")
    if position_status=='close':
        print ("Sorry, the position is close")
    else:
        add_application_to_redis(candidate, combined_id)
        add_application_to_mongo(candidate, job_id,company_name)


def add_application_to_redis(candidate, combined_id):
    #Requirement_Per_Position
    requirements = r.smembers(combined_id+"_req_stud28")
    candidate_skills = set(candidate['skills'])
    number_of_matches = len(set(requirements).intersection(candidate_skills))
    #Candidates_Mails_Per_Position
    r.zadd(combined_id+"_mail_stud28", number_of_matches, candidate['email'])
    splitted_id = combined_id.split('_')
    print 'Application for postion id {} in {} recieved'.format(str(splitted_id[1]),str(splitted_id[0]))
    
    
def add_application_to_mongo(candidate, job_id,company_name):
        companies.update_one({"company_name": company_name,
                              "job_list.job_id":int(job_id)
                             },
                             {
                           "$addToSet": {"job_list.$.applications_list": candidate},
                           "$currentDate": {"lastModified": True}
   })

# Operation 5 - show_number_of_jobs

In [27]:
def show_number_of_jobs(location):
    open_positions_list = [c_id  for c_id in r.smembers(location+"_stud28") if r.get(c_id+"_status_stud28")=='open']
    print "number of posistion in {}: {}".format(location,len(open_positions_list)) 
    return len(open_positions_list)

# Operation 6 - show_candidates

In [28]:
def show_candidates(company_name, job_id):
    combined_id = company_name+"_"+str(job_id)
    try:
        print "sorted set of candidates for job {} in {}: {}".format(job_id,company_name, r.zrevrange(combined_id+"_mail_stud28", 0, -1))
    except:
        print "candidates for job {} in {} were not found".format(job_id,company_name)
    return r.zrevrange(combined_id+"_mail_stud28", 0, -1)

# Report 1 - count_jobs_by_company

In [29]:
def count_jobs_by_company():
    aggregation_function = [
   {
      "$project": {
         "company_name": 1,
         "num_posted_jobs": { "$cond": { "if": { "$isArray": "$job_list" }, "then": { "$size": "$job_list" }, "else": "NA"} }
      }
   }
	]
    agg_result = db.companies.aggregate(aggregation_function)
    df = pd.DataFrame(list(agg_result))
    display(df[['company_name','num_posted_jobs']])

# Report 2 - count_candidates_by_job

In [30]:
def count_candidates_by_job():
    curr_date = datetime.now()
    tup_list =[]
    for doc in companies.find():
        for job in doc['job_list']:
            app_list = job['applications_list']
            relevant_apps =  [app['email'] for app in app_list if (curr_date - datetime.strptime(app['Application Date'], '%d-%m-%Y %H:%M:%S')).days<=30]
            tup = (doc["company_name"],job['job_id'],len(set(relevant_apps)))
            tup_list.append(tup)
    df = pd.DataFrame(tup_list, columns = ['company_name','job_id','recent_candidate'])
    display(df)

# Recovery

In [31]:
def recovery():
# delete all the leftovers    
    for k in r.keys():
        if 'stud28' in k:
            r.delete(k)
    print 'redis is ready to restore'
    # restore the data from mongo
    for company in companies.find():
        #add company
        company_name = company['company_name']
        add_company_to_redis(company)
        #add jobs
        try:
            for job in company['job_list']:
                job_id = job['job_id']
                combined_id = company_name+"_"+str(job_id)
                add_job_to_redis(job_id,job,company_name)
                #add applications
                try:
                    for candidate in job['applications_list']:
                        combined_id = company_name+"_"+str(job_id)
                        add_application_to_redis(candidate, combined_id)
                        
                except:
                    print 'There is no applications for job: {}'.format(combined_id)   
                
                
        except:
            print 'There is no jobs for company: {}'.format(company_name)

    print 'Finish to restore data of company: {}'.format(company_name)

In [33]:
def execute():
    add_company({'company_name':'TAU', 'company_description':'University'})
    add_job({'job_name':'bi developer', 'location': 'Tel Aviv','requirements':['python','big data','mongodb'],'status':'open','publish_date':'01-02-2019'},'TAU')
    add_application_({'candidate_name':'laura', 'email':'laura@gmail.com','linkedin':'https://www.linkedin.com/in/laura/', 'skills': ['python','sql'],'Application Date':'01-02-2019 15:00:00'}, '1','TAU')
    update_job_status('TAU','1','close')
    update_job_status('TAU','1','open')
    show_number_of_jobs('Tel Aviv')
    show_candidates('TAU','1')    
    count_jobs_by_company()
    count_candidates_by_job()
    recovery()
 

In [34]:
execute()

{'company_name': 'TAU', 'company_description': 'University'} created
TAU added bi developer position, position id - 1
Application for postion id 1 in TAU recieved
job_id 1 in TAU is now close
job_id 1 in TAU is now open
number of posistion in Tel Aviv: 1
sorted set of candidates for job 1 in TAU: ['laura@gmail.com']


Unnamed: 0,company_name,num_posted_jobs
0,TAU,1


Unnamed: 0,company_name,job_id,recent_candidate
0,TAU,1,0


redis is ready to restore
{u'lastModified': datetime.datetime(2020, 5, 25, 15, 3, 6, 754000), u'_id': ObjectId('5ecbde2a8ec1437b13b69916'), u'job_list': [{u'status': u'open', u'applications_list': [{u'skills': [u'python', u'sql'], u'candidate_name': u'laura', u'Application Date': u'01-02-2019 15:00:00', u'email': u'laura@gmail.com', u'linkedin': u'https://www.linkedin.com/in/laura/'}], u'requirements': [u'python', u'big data', u'mongodb'], u'job_id': 1, u'publish_date': u'01-02-2019', u'location': u'Tel Aviv', u'job_name': u'bi developer'}], u'company_name': u'TAU', u'company_description': u'University'} created
TAU added bi developer position, position id - 1
Application for postion id 1 in TAU recieved
Finish to restore data of company: TAU


In [16]:
print companies.find_one({"company_name": 'TAU'})

{u'lastModified': datetime.datetime(2020, 5, 25, 13, 11, 33, 978000), u'_id': ObjectId('5ecbc4058ec143732464a799'), u'job_list': [{u'status': u'open', u'applications_list': [{u'skills': [u'python', u'sql'], u'candidate_name': u'laura', u'Application Date': u'01-02-2019 15:00:00', u'email': u'laura@gmail.com', u'linkedin': u'https://www.linkedin.com/in/laura/'}], u'requirements': [u'python', u'big data', u'mongodb'], u'job_id': 1, u'publish_date': u'01-02-2019', u'location': u'Tel Aviv', u'job_name': u'bi developer'}], u'company_name': u'TAU', u'company_description': u'University'}


In [None]:
for k in r.keys():
    if 'stud28' in k:
        r.delete(k)
db.companies.drop()