In [543]:
import numpy as np
import pandas as pd
from datetime import datetime

In [452]:
data = pd.read_csv('SalesForce_Contact.csv', encoding='latin', low_memory=False)
data.shape

(132445, 391)

# General Methods

In [453]:
def count_value_null(feature, data):
    #print("Feature = ", feature)
    print("Number of records:", len(data[feature].index))
    print("Null Values:", data[feature].isna().sum())
    print("Null Values %:", data[feature].isna().sum()/len(data.index)*100)
    print("Unique Values:", data[feature].nunique())
    print("Top 10 Value Counts:")
    print(data[feature].value_counts().head(10))
    print("\n")

# Seperate Contacts into Volunteers and Clients

In [454]:
count_value_null('Volunteer__c', data)
count_value_null('Client__c', data)

Number of records: 132445
Null Values: 0
Null Values %: 0.0
Unique Values: 2
Top 10 Value Counts:
0    130070
1      2375
Name: Volunteer__c, dtype: int64


Number of records: 132445
Null Values: 0
Null Values %: 0.0
Unique Values: 2
Top 10 Value Counts:
1    105744
0     26701
Name: Client__c, dtype: int64




In [455]:
#Contacts that are neither Clients nor Volunteers (Who are they?)
data[(data['Client__c']== 0) & (data['Volunteer__c']== 0)].shape

(24484, 391)

In [456]:
#Contacts that are either Clients or Volunteers
data[(data['Client__c']== 1) & (data['Volunteer__c']== 1)].shape

(158, 391)

In [457]:
volunteer = data[(data['Volunteer__c']==1)]
volunteer.shape

(2375, 391)

In [458]:
client = data[(data['Client__c']==1)]
client.shape

(105744, 391)

In [708]:
confirmed_hire = data[(data['Hire_Heroes_USA_Confirmed_Hire__c']==1)]
confirmed_hire.shape

(30350, 391)

In [709]:
non_confirmed_hire = data[(data['Hire_Heroes_USA_Confirmed_Hire__c']==0)]
non_confirmed_hire.shape

(102095, 391)

# Volunteers

Examine if case owners and volunteers are the same

In [460]:
count_value_null('OwnerId', client)

Number of records: 105744
Null Values: 0
Null Values %: 0.0
Unique Values: 196
Top 10 Value Counts:
00550000000ztRtAAI    30267
00550000001YDwUAAW     3049
00550000001zVqjAAE     2491
00550000001Vw8JAAS     1400
005500000042X0TAAU     1304
00538000004lJWlAAM     1244
00538000004SivDAAS     1221
005500000042GlIAAU     1200
00538000004kgf9AAA     1199
00538000004SiwfAAC     1177
Name: OwnerId, dtype: int64




In [461]:
count_value_null('OwnerId', volunteer)

Number of records: 2375
Null Values: 0
Null Values %: 0.0
Unique Values: 99
Top 10 Value Counts:
00550000001WkUeAAK    1496
00538000004kmwWAAQ     243
00550000000xADhAAM     180
005380000062eOzAAI     170
00538000004tArfAAE      87
00538000004gsNsAAI      10
00550000000z4FcAAI      10
00550000000ztRtAAI       7
00550000001XN4zAAG       7
00550000001YDwUAAW       7
Name: OwnerId, dtype: int64




In [462]:
data[data['Id'] == "00550000000ztRtAAI"]

Unnamed: 0,Id,AccountId,RecordTypeId,MailingState,MailingPostalCode,MailingCountry,LeadSource,OwnerId,HasOptedOutOfEmail,HasOptedOutOfFax,...,Send_Green_Survey__c,Professional_Certification__c,TS_Referral_Requested_By__c,RealZip__RealZip__c,Discharge_Type__c,Discharge_Disposition__c,Date_Turned_Black__c,Litmos__Litmos_Login_Access__c,Litmos__Total_Sum_Percentages__c,Willing_to_Relocate_to_High_Risk_Area__c


In [463]:
ownerId = data['OwnerId'].unique()
count = 0
for i in ownerId:
    if (i in data["Id"]):
        count = count + 1
count

0

Observations: Both Client and Volunteer has OwnerId. Owner itself doesn't have a record in the Contacts (which include Volunteers and Clients). Owners might be diffrent from Volunteers. We want to assess performance of Case Owners but not Volunteers.

# Owners

Check if the owner of a case actually completed the assessment and resume for that case

In [464]:
print(confirmed_hire.shape)
result3 = confirmed_hire[(confirmed_hire['OwnerId']== confirmed_hire['Assessment_Completed_By__c']) & (confirmed_hire['OwnerId']== confirmed_hire['Resume_Completed_By__c'])]
print(result3.shape)

(30350, 391)
(22101, 391)


In [465]:
result4 = confirmed_hire[(confirmed_hire['Resume_Completed_By__c']== confirmed_hire['Assessment_Completed_By__c'])]
result4.shape

(28923, 391)

In [466]:
result5 = confirmed_hire[(confirmed_hire['Resume_Completed_By__c']!= confirmed_hire['Assessment_Completed_By__c'])]
result5.shape
result5[['OwnerId','Resume_Completed_By__c','Assessment_Completed_By__c']]

Unnamed: 0,OwnerId,Resume_Completed_By__c,Assessment_Completed_By__c
656,00538000005U4bgAAC,,00538000005U4bgAAC
864,00550000002ME8BAAW,00550000002ME8BAAW,
2226,00550000001zjLpAAI,00550000001zjLpAAI,
2287,005380000063Mg0AAE,,005380000063Mg0AAE
3606,0050z0000079dpsAAA,00538000004SivXAAS,005500000043hc0AAA
3749,005380000063Mg0AAE,,005380000063Mg0AAE
6052,005500000042GlIAAU,005500000042GlIAAU,00538000005SdCRAA0
6254,0050z0000079ElgAAE,00538000004kCkJAAU,00550000000ztRtAAI
9318,00550000001Vw8JAAS,,00550000001Vw8JAAS
11664,00550000001YDwUAAW,00550000001ZAiMAAW,


In [467]:
result7 = confirmed_hire[(confirmed_hire['OwnerId']!= confirmed_hire['Assessment_Completed_By__c']) & (confirmed_hire['OwnerId'] != confirmed_hire['Resume_Completed_By__c'])]
result5.shape

(1427, 391)

In [468]:
count_value_null('Assessment_Completed_By__c', confirmed_hire)

Number of records: 30350
Null Values: 708
Null Values %: 2.3327841845140034
Unique Values: 166
Top 10 Value Counts:
00550000001zVqjAAE    813
00550000001YDwUAAW    693
005500000042TrsAAE    673
00550000002dthaAAA    575
005500000042c6HAAQ    553
00550000003HPPXAA4    517
00538000004lAqgAAE    507
00550000001Vw8JAAS    507
00538000004sKV8AAM    492
005500000042GlIAAU    491
Name: Assessment_Completed_By__c, dtype: int64




In [469]:
count_value_null('Resume_Completed_By__c', confirmed_hire)

Number of records: 30350
Null Values: 653
Null Values %: 2.1515650741350907
Unique Values: 166
Top 10 Value Counts:
00550000001zVqjAAE    829
00550000001YDwUAAW    716
005500000042TrsAAE    674
00550000002dthaAAA    599
005500000042c6HAAQ    550
00550000003HPPXAA4    515
005500000042GlIAAU    506
00538000004lAqgAAE    504
00550000001Vw8JAAS    501
00538000004sKV8AAM    496
Name: Resume_Completed_By__c, dtype: int64




In [470]:
count_value_null('OwnerId', confirmed_hire)

Number of records: 30350
Null Values: 0
Null Values %: 0.0
Unique Values: 178
Top 10 Value Counts:
00550000001zVqjAAE    976
00550000001YDwUAAW    911
005500000042TrsAAE    696
00538000004sKV8AAM    667
005500000042GlIAAU    610
00538000004lAqgAAE    595
00538000004SivDAAS    578
00538000004kgf9AAA    562
005500000042c6HAAQ    557
005500000042X0TAAU    557
Name: OwnerId, dtype: int64




In [471]:
confirmed_hire = confirmed_hire.dropna(subset=['Assessment_Completed_By__c', 'Resume_Completed_By__c'], how='all')
confirmed_hire.shape

(29938, 391)

In [742]:
confirmed_hire['Case_Owner'] = confirmed_hire['OwnerId']
confirmed_hire.loc[confirmed_hire['Resume_Completed_By__c'] == confirmed_hire['Assessment_Completed_By__c'], 'Case_Owner'] = confirmed_hire['Resume_Completed_By__c']
confirmed_hire[['Case_Owner', 'OwnerId','Resume_Completed_By__c','Assessment_Completed_By__c']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


Unnamed: 0,Case_Owner,OwnerId,Resume_Completed_By__c,Assessment_Completed_By__c
4,00538000005F2ADAA0,00538000005F2ADAA0,00538000005F2ADAA0,00538000005F2ADAA0
12,005380000063Mf7AAE,005380000063Mf7AAE,005380000063Mf7AAE,005380000063Mf7AAE
32,00538000004sKV8AAM,00538000004sKV8AAM,00538000004sKV8AAM,00538000004sKV8AAM
40,005380000063MfvAAE,005380000063MfvAAE,005380000063MfvAAE,005380000063MfvAAE
47,00538000004wgQWAAY,00538000004wgQWAAY,00538000004wgQWAAY,00538000004wgQWAAY
54,005380000063fCOAAY,005380000063fCOAAY,005380000063fCOAAY,005380000063fCOAAY
66,00538000005TGOlAAO,00538000005TGOlAAO,00538000005TGOlAAO,00538000005TGOlAAO
71,00538000005ZA5kAAG,00538000005ZA5kAAG,00538000005ZA5kAAG,00538000005ZA5kAAG
72,00538000004sKV8AAM,00538000004sKV8AAM,00538000004sKV8AAM,00538000004sKV8AAM
74,00538000004wgQWAAY,00538000004wgQWAAY,00538000004wgQWAAY,00538000004wgQWAAY


In [473]:
count_value_null('Case_Owner', confirmed_hire)

Number of records: 29938
Null Values: 0
Null Values %: 0.0
Unique Values: 170
Top 10 Value Counts:
00550000001zVqjAAE    835
00550000001YDwUAAW    756
005500000042TrsAAE    678
00550000002dthaAAA    605
005500000042c6HAAQ    553
00550000003HPPXAA4    519
00550000001Vw8JAAS    514
005500000042GlIAAU    511
00538000004lAqgAAE    509
00538000004sKV8AAM    496
Name: Case_Owner, dtype: int64




# Confirmed Hire

In [711]:
# How many times Staff member has attempted to reach contact by phone (successful AND unsuccessful)
count_value_null('ringdna100__Call_Attempts__c', confirmed_hire)
count_value_null('ringdna100__Call_Attempts__c', non_confirmed_hire)

Number of records: 30350
Null Values: 19581
Null Values %: 64.51729818780889
Unique Values: 27
Top 10 Value Counts:
1.0     1636
2.0     1468
3.0     1450
4.0     1414
5.0     1213
6.0      997
7.0      725
8.0      518
9.0      442
10.0     282
Name: ringdna100__Call_Attempts__c, dtype: int64


Number of records: 102095
Null Values: 79804
Null Values %: 78.16641363436015
Unique Values: 31
Top 10 Value Counts:
1.0     4865
3.0     4054
2.0     2603
4.0     2560
5.0     1798
6.0     1468
7.0     1149
8.0      915
9.0      702
10.0     585
Name: ringdna100__Call_Attempts__c, dtype: int64




In [712]:
count_value_null('ringdna100__Email_Attempts__c', confirmed_hire)
count_value_null('ringdna100__Email_Attempts__c', non_confirmed_hire)

Number of records: 30350
Null Values: 18949
Null Values %: 62.43492586490939
Unique Values: 58
Top 10 Value Counts:
1.0     1682
2.0     1239
3.0     1156
4.0     1072
5.0      934
6.0      831
7.0      681
8.0      624
9.0      529
10.0     457
Name: ringdna100__Email_Attempts__c, dtype: int64


Number of records: 102095
Null Values: 76413
Null Values %: 74.84499730643029
Unique Values: 103
Top 10 Value Counts:
1.0     5246
3.0     4300
2.0     2765
4.0     2351
5.0     1768
6.0     1613
7.0     1216
8.0      997
9.0      863
10.0     742
Name: ringdna100__Email_Attempts__c, dtype: int64




In [477]:
count_value_null('Employment_Barriers__c', confirmed_hire)
# a lot of nulls, cannot use

Number of records: 29938
Null Values: 29712
Null Values %: 99.24510655354399
Unique Values: 32
Top 10 Value Counts:
None                                                153
Lack of Experience                                    9
Education                                             8
License and certification requirements                8
Medical/Physical/Mental Limitations                   5
Gap(s) in Employment                                  5
Education;License and certification requirements      4
Career Change                                         3
Relocation(s)                                         3
Security clearance expired or not relevant            3
Name: Employment_Barriers__c, dtype: int64




In [714]:
confirmed_hire['ringdna100__Email_Attempts__c'][confirmed_hire['ringdna100__Email_Attempts__c']> 10.0].value_counts()

11.0     374
12.0     317
13.0     235
14.0     217
15.0     191
16.0     138
17.0     114
18.0     102
19.0      92
20.0      71
21.0      50
23.0      43
22.0      33
24.0      31
26.0      25
25.0      20
28.0      19
27.0      17
29.0      14
31.0      13
30.0      13
33.0       9
32.0       7
34.0       5
35.0       4
37.0       4
40.0       4
44.0       3
47.0       3
42.0       3
53.0       2
38.0       2
36.0       2
39.0       2
58.0       2
51.0       2
43.0       2
173.0      1
55.0       1
63.0       1
100.0      1
105.0      1
50.0       1
56.0       1
92.0       1
54.0       1
104.0      1
95.0       1
Name: ringdna100__Email_Attempts__c, dtype: int64

In [715]:
# How many times Staff member has attempted to reach contact by phone (successful AND unsuccessful)
confirmed_hire['ringdna100__Call_Attempts__c'][confirmed_hire['ringdna100__Call_Attempts__c']>10.0].value_counts()

11.0    195
12.0    121
13.0     96
14.0     73
15.0     49
16.0     29
17.0     21
18.0     13
20.0      9
19.0      6
21.0      3
23.0      3
22.0      2
29.0      1
26.0      1
25.0      1
30.0      1
Name: ringdna100__Call_Attempts__c, dtype: int64

In [539]:
count_value_null('Date_turned_grey__c', confirmed_hire)

Number of records: 29938
Null Values: 26075
Null Values %: 87.09666644398423
Unique Values: 1109
Top 10 Value Counts:
7/27/2017 0:00    128
6/30/2017 0:00     22
6/2/2017 0:00      20
7/12/2017 0:00     18
6/10/2016 0:00     17
7/14/2017 0:00     17
7/25/2017 0:00     15
4/19/2017 0:00     15
7/7/2017 0:00      15
6/27/2017 0:00     15
Name: Date_turned_grey__c, dtype: int64




In [542]:
temp = confirmed_hire[['CreatedDate','Date_turned_grey__c', 'Date_Turned_Black__c','Date_turned_green__c','Date_Submitted_for_Hire__c']]
temp = temp.dropna(subset=['Date_turned_grey__c', 'Date_Turned_Black__c'], how='all')
temp


Unnamed: 0,CreatedDate,Date_turned_grey__c,Date_Turned_Black__c,Date_turned_green__c,Date_Submitted_for_Hire__c
4,8/5/2018 15:41,8/16/2018 0:00,8/9/2018 0:00,8/16/2018 0:00,
80,8/6/2018 23:29,10/9/2018 0:00,,8/15/2018 0:00,
124,8/7/2018 18:52,11/14/2018 0:00,,8/15/2018 0:00,
128,8/7/2018 20:19,,8/16/2018 0:00,8/30/2018 0:00,
651,8/14/2018 14:21,8/20/2018 0:00,,8/23/2018 0:00,
749,8/15/2018 0:10,12/12/2018 0:00,,8/20/2018 0:00,
934,8/15/2018 22:51,,9/13/2018 0:00,9/19/2018 0:00,
1015,8/16/2018 15:54,8/24/2018 0:00,,9/7/2018 0:00,
1178,8/18/2018 21:28,9/6/2018 0:00,9/6/2018 0:00,9/6/2018 0:00,
1246,8/20/2018 13:47,,8/30/2018 0:00,9/6/2018 0:00,


In [730]:
confirmed_hire['Days_To_Grey'] = int()
from datetime import datetime
for index, row in confirmed_hire.iterrows():
    if pd.isnull(row['Date_turned_grey__c']) == False:
        date_format = "%m/%d/%Y %H:%M"
        a = datetime.strptime(str(row['CreatedDate']), date_format)
        b = datetime.strptime(str(row['Date_turned_grey__c']), date_format)
        confirmed_hire.at[index,'Days_To_Grey']=(b-a).days

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [731]:
confirmed_hire['Days_To_Grey'].value_counts()

0       26460
9         120
10        109
7         104
13        101
12         95
6          91
8          90
11         88
14         60
16         57
17         54
15         52
19         44
5          42
20         40
3          39
4          35
21         34
18         34
2          24
22         23
23         23
24         21
26         18
25         17
29         15
27         15
28         15
40         12
        ...  
680         1
568         1
519         1
472         1
599         1
631         1
663         1
679         1
743         1
759         1
775         1
823         1
887         1
903         1
999         1
1031        1
1631        1
1175        1
1287        1
1351        1
1415        1
1511        1
1575        1
1879        1
2105        1
72          1
88          1
248         1
456         1
1111        1
Name: Days_To_Grey, Length: 860, dtype: int64

In [734]:
confirmed_hire['Days_To_Black'] = int()
from datetime import datetime
for index, row in confirmed_hire.iterrows():
    if pd.isnull(row['Date_Turned_Black__c']) == False:
        date_format = "%m/%d/%Y %H:%M"
        a = datetime.strptime(str(row['CreatedDate']), date_format)
        b = datetime.strptime(str(row['Date_Turned_Black__c']), date_format)
        confirmed_hire.at[index,'Days_To_Black']=(b-a).days

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [735]:
confirmed_hire['Days_To_Black'].value_counts()

0       30212
7          20
5          11
9          11
4          10
6           9
12          7
10          6
8           6
11          4
2           4
14          3
15          3
3           3
27          3
20          3
23          2
17          2
13          2
21          2
76          2
35          1
256         1
19          1
658         1
18          1
564         1
16          1
1283        1
127         1
69          1
1110        1
1270        1
216         1
25          1
42          1
90          1
778         1
91          1
28          1
188         1
860         1
813         1
1661        1
926         1
359         1
Name: Days_To_Black, dtype: int64

In [646]:
confirmed_hire['Days_To_Hire'] = int()
from datetime import datetime
for index, row in confirmed_hire.iterrows():
    if pd.isnull(row['Date_Submitted_for_Hire__c']) == False:
        date_format = "%m/%d/%Y %H:%M"
        a = datetime.strptime(str(row['CreatedDate']), date_format)
        b = datetime.strptime(str(row['Date_Submitted_for_Hire__c']), date_format)
        confirmed_hire.at[index,'Days_To_Hire']=(b-a).days

In [647]:
confirmed_hire['Days_To_Hire'].value_counts()
#confirmed_hire['Days_To_Hire'].mean()

0       6994
76       110
111      106
69       106
34       103
55       102
97       101
84       100
103      100
90        99
98        99
50        97
56        96
118       96
77        96
126       95
82        93
124       93
133       91
83        91
139       91
73        90
70        90
63        90
71        90
49        89
41        88
105       88
101       87
68        87
        ... 
1862       1
1590       1
795        1
1462       1
1334       1
1302       1
1190       1
1094       1
1678       1
1149       1
1250       1
2094       1
1314       1
1330       1
1394       1
1458       1
1474       1
1490       1
1053       1
1069       1
1634       1
1794       1
1970       1
1085       1
955        1
2382       1
907        1
1133       1
2190       1
1887       1
Name: Days_To_Hire, Length: 1243, dtype: int64

In [716]:
print(confirmed_hire["Gender__c"].value_counts())
print(non_confirmed_hire["Gender__c"].value_counts())
print(client["Gender__c"].value_counts())

Male      22058
Female     5588
Name: Gender__c, dtype: int64
Male        30702
Female      10927
--None--       11
Name: Gender__c, dtype: int64
Male      51515
Female    15429
Name: Gender__c, dtype: int64


In [673]:
count_value_null('Months_Unemployed__c', confirmed_hire)

Number of records: 29938
Null Values: 24148
Null Values %: 80.66003073017569
Unique Values: 25
Top 10 Value Counts:
0.0     2376
1.0      736
2.0      646
3.0      503
4.0      352
6.0      247
5.0      225
12.0     165
8.0      122
7.0      110
Name: Months_Unemployed__c, dtype: int64




In [674]:
count_value_null('Date_of_Service_Entry__c', confirmed_hire)

Number of records: 29938
Null Values: 25302
Null Values %: 84.51466363818558
Unique Values: 3493
Top 10 Value Counts:
1998    12
2004    10
2008    10
2010     9
2007     9
2006     8
2003     8
2005     7
1996     7
1990     7
Name: Date_of_Service_Entry__c, dtype: int64




In [675]:
count_value_null('Date_of_Separation__c', confirmed_hire)

Number of records: 29938
Null Values: 24884
Null Values %: 83.11844478589084
Unique Values: 2895
Top 10 Value Counts:
7/31/2014     28
9/30/2014     23
9/30/2013     23
8/31/2014     23
Unknown       23
10/31/2013    21
TBD           20
UNK           18
8/1/2014      18
12/31/2012    18
Name: Date_of_Separation__c, dtype: int64




# Calculate Responsive Points

In [717]:
confirmed_hire['Responsive_Point'] = 0.0
count_value_null('Responsive_Point', confirmed_hire)

Number of records: 30350
Null Values: 0
Null Values %: 0.0
Unique Values: 1
Top 10 Value Counts:
0.0    30350
Name: Responsive_Point, dtype: int64




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Create function to calculate responsive points for confirmed hires from actions

In [718]:
def update_responsive_points(feature, point):
    for index, rowData in confirmed_hire.iterrows():
        if rowData[feature] == 1.0:
            confirmed_hire.at[index,'Responsive_Point'] = rowData['Responsive_Point'] + point
    print(confirmed_hire['Responsive_Point'].value_counts().sort_index())

In [719]:
#True / False (indicates O2O coordinator has had first contact with client)
update_responsive_points('O2O_Initial_Assessment_Complete__c', 0.5)  

0.0    30010
0.5      340
Name: Responsive_Point, dtype: int64


In [720]:
# True / False (indicates new resume created / revised)
update_responsive_points('Finalized_HHUSA_revised_resume_on_file__c', 0.5)    

0.0      281
0.5    29729
1.0      340
Name: Responsive_Point, dtype: int64


In [721]:
# True / False (indicates client has created a profile on HHUSA job board)
update_responsive_points('On_Job_Board__c', 1)           

0.0      279
0.5    26005
1.0      192
1.5     3724
2.0      150
Name: Responsive_Point, dtype: int64


In [722]:
# Client has attended one or more virtual workshop events
update_responsive_points('Virtual_Workshop_Participant__c', 1.5)  

0.0      279
0.5    25958
1.0      192
1.5     3680
2.0      197
3.0       44
Name: Responsive_Point, dtype: int64


In [723]:
#Client has attended / participated in a virtual career fair
update_responsive_points('VCF_Participant__c', 1.5)  

0.0      278
0.5    25688
1.0      176
1.5     3439
2.0      429
2.5       16
3.0      268
3.5       38
4.5       18
Name: Responsive_Point, dtype: int64


In [724]:
#Client has attended one or more webinar events
update_responsive_points('Webinar_Participant__c', 1.5)  

0.0      278
0.5    25587
1.0      174
1.5     3372
2.0      508
2.5       17
3.0      315
3.5       54
4.0        1
4.5       37
5.0        6
6.0        1
Name: Responsive_Point, dtype: int64


In [725]:
#Participated in a mentoring session or mock phone interview
update_responsive_points('Used_Volunteer_Services__c', 2) 

0.0      272
0.5    23128
1.0      119
1.5     2735
2.0      347
2.5     2471
3.0      274
3.5      668
4.0      168
4.5       23
5.0       99
5.5       23
6.5       19
7.0        3
8.0        1
Name: Responsive_Point, dtype: int64


In [726]:
#Job Seeking client had a federal resume reviewed by HHUSA team
update_responsive_points('Used_Federal_Services__c', 2)

0.0      272
0.5    22331
1.0      111
1.5     2492
2.0      313
2.5     3071
3.0      246
3.5      817
4.0      181
4.5      215
5.0      116
5.5      113
6.0       21
6.5       20
7.0       22
7.5        4
8.0        1
8.5        4
Name: Responsive_Point, dtype: int64


In [727]:
# How many times Staff member has attempted to reach contact by phone (successful AND unsuccessful)
for index, rowData in confirmed_hire.iterrows():
    if rowData['ringdna100__Call_Attempts__c'] > 10.0:
        confirmed_hire.at[index,'Responsive_Point'] = rowData['Responsive_Point'] + 2
print(confirmed_hire['Responsive_Point'].value_counts().sort_index())

0.0       272
0.5     22116
1.0       111
1.5      2403
2.0       300
2.5      3172
3.0       226
3.5       829
4.0       170
4.5       311
5.0       116
5.5       173
6.0        40
6.5        33
7.0        38
7.5        20
8.0         6
8.5         7
9.0         4
9.5         1
10.5        2
Name: Responsive_Point, dtype: int64


In [728]:
# How many times Staff member has attempted to reach contact by phone (successful AND unsuccessful)
for index, rowData in confirmed_hire.iterrows():
    if rowData['ringdna100__Email_Attempts__c'] > 10.0:
        confirmed_hire.at[index,'Responsive_Point'] = rowData['Responsive_Point'] + 2
print(confirmed_hire['Responsive_Point'].value_counts().sort_index())

0.0       272
0.5     21514
1.0       104
1.5      2160
2.0       261
2.5      3274
3.0       187
3.5       804
4.0       140
4.5       664
5.0       109
5.5       333
6.0        78
6.5       155
7.0        64
7.5       113
8.0        32
8.5        28
9.0        27
9.5        15
10.0        5
10.5        4
11.0        4
11.5        1
12.5        2
Name: Responsive_Point, dtype: int64


In [732]:
for index, rowData in confirmed_hire.iterrows():
    if rowData['Days_To_Grey']> 0 & rowData['Days_To_Grey'] < 30:
        confirmed_hire.at[index,'Responsive_Point'] = rowData['Responsive_Point'] - 2
print(confirmed_hire['Responsive_Point'].value_counts().sort_index())

-2.0        55
-1.5      2668
-1.0        20
-0.5       286
 0.0       259
 0.5     19284
 1.0       109
 1.5      1976
 2.0       236
 2.5      2930
 3.0       178
 3.5       740
 4.0       136
 4.5       600
 5.0       100
 5.5       315
 6.0        70
 6.5       130
 7.0        59
 7.5        93
 8.0        28
 8.5        23
 9.0        26
 9.5        15
 10.0        4
 10.5        4
 11.0        3
 11.5        1
 12.5        2
Name: Responsive_Point, dtype: int64


In [736]:
for index, rowData in confirmed_hire.iterrows():
    if rowData['Days_To_Black']> 0 & rowData['Days_To_Black'] < 30:
        confirmed_hire.at[index,'Responsive_Point'] = rowData['Responsive_Point'] - 3
print(confirmed_hire['Responsive_Point'].value_counts().sort_index())

-4.5         6
-3.5         3
-3.0         2
-2.5        56
-2.0        55
-1.5      2675
-1.0        20
-0.5       310
 0.0       260
 0.5     19237
 1.0       112
 1.5      1970
 2.0       237
 2.5      2905
 3.0       177
 3.5       733
 4.0       134
 4.5       593
 5.0        99
 5.5       313
 6.0        69
 6.5       128
 7.0        58
 7.5        93
 8.0        28
 8.5        23
 9.0        25
 9.5        15
 10.0        4
 10.5        4
 11.0        3
 11.5        1
 12.5        2
Name: Responsive_Point, dtype: int64


In [737]:
confirmed_hire['Highest_Level_of_Education_Completed__c'].value_counts()

High School/GED                                                                                                                                                                                                                                  9830
4 Year Degree (BA, BS, etc.)                                                                                                                                                                                                                     8765
Post-Graduate Degree (MA, MS, JD, etc.)                                                                                                                                                                                                          4771
2 Year Degree (AA, AS, etc.)                                                                                                                                                                                                                     4546
Doctorate (PhD, 

In [738]:
for index, rowData in confirmed_hire.iterrows():
    if rowData['Highest_Level_of_Education_Completed__c'] == "Doctorate (PhD, MD, etc.)":
        confirmed_hire.at[index,'Responsive_Point'] = rowData['Responsive_Point'] + 10
    elif rowData['Highest_Level_of_Education_Completed__c'] == "Post-Graduate Degree (MA, MS, JD, etc.)":
        confirmed_hire.at[index,'Responsive_Point'] = rowData['Responsive_Point'] + 8
    elif rowData['Highest_Level_of_Education_Completed__c'] == "4 Year Degree (BA, BS, etc.)":
        confirmed_hire.at[index,'Responsive_Point'] = rowData['Responsive_Point'] + 6
    elif rowData['Highest_Level_of_Education_Completed__c'] == "2 Year Degree (AA, AS, etc.)":
        confirmed_hire.at[index,'Responsive_Point'] = rowData['Responsive_Point'] + 4
    elif rowData['Highest_Level_of_Education_Completed__c'] == "High School/GED":
        confirmed_hire.at[index,'Responsive_Point'] = rowData['Responsive_Point'] - 4
print(confirmed_hire['Responsive_Point'].value_counts().sort_index())

-8.5        2
-6.5       21
-6.0        9
-5.5     1033
-5.0        5
-4.5       96
-4.0       43
-3.5     6797
-3.0       39
-2.5      535
-2.0       86
-1.5      904
-1.0       27
-0.5      182
 0.0      167
 0.5     1838
 1.0       27
 1.5       91
 2.0       20
 2.5      552
 3.0        9
 3.5      107
 4.0       42
 4.5     3563
 5.0       25
 5.5      429
 6.0       46
 6.5     5895
 7.0       65
 7.5      815
 8.0      127
 8.5     3737
 9.0       88
 9.5      698
 10.0     128
 10.5     953
 11.0      96
 11.5     340
 12.0      59
 12.5     229
 13.0      66
 13.5     133
 14.0      40
 14.5      55
 15.0      37
 15.5      41
 16.0      16
 16.5      10
 17.0       9
 17.5       5
 18.0       4
 18.5       2
 19.0       3
 19.5       2
 20.5       2
Name: Responsive_Point, dtype: int64


In [739]:
for index, rowData in confirmed_hire.iterrows():
    if rowData['Months_Unemployed__c']> 6.0:
        confirmed_hire.at[index,'Responsive_Point'] = rowData['Responsive_Point'] - 3
print(confirmed_hire['Responsive_Point'].value_counts().sort_index())

-8.5       39
-8.0        2
-7.5        7
-7.0        1
-6.5      134
-6.0       13
-5.5     1013
-5.0        5
-4.5      109
-4.0       42
-3.5     6690
-3.0       36
-2.5      524
-2.0       85
-1.5      887
-1.0       27
-0.5      200
 0.0      168
 0.5     1835
 1.0       27
 1.5      166
 2.0       21
 2.5      539
 3.0        8
 3.5      214
 4.0       43
 4.5     3515
 5.0       29
 5.5      504
 6.0       48
 6.5     5812
 7.0       67
 7.5      819
 8.0      131
 8.5     3673
 9.0       86
 9.5      677
 10.0     129
 10.5     925
 11.0      90
 11.5     320
 12.0      63
 12.5     224
 13.0      61
 13.5     129
 14.0      37
 14.5      53
 15.0      32
 15.5      40
 16.0      17
 16.5       8
 17.0       9
 17.5       5
 18.0       4
 18.5       2
 19.0       2
 19.5       2
 20.5       2
Name: Responsive_Point, dtype: int64


In [740]:
confirmed_hire['Case_Type'] = "Medium"
for index, rowData in confirmed_hire.iterrows():
    if rowData['Responsive_Point'] < 0.0:
        confirmed_hire.at[index,'Case_Type'] = "Hard"
    if rowData['Responsive_Point'] > 5.0:
        confirmed_hire.at[index,'Case_Type'] = "Easy"
print(confirmed_hire['Case_Type'].value_counts())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Easy      13971
Hard       9814
Medium     6565
Name: Case_Type, dtype: int64


In [743]:
table = pd.crosstab(confirmed_hire['Case_Owner'], columns=confirmed_hire['Case_Type'])
table

Case_Type,Easy,Hard,Medium
Case_Owner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0050z000006tEPEAA2,9,7,0
0050z0000078xG2AAI,2,4,1
0050z0000078xGMAAY,3,3,0
0050z00000790U2AAI,3,1,4
0050z00000796uVAAQ,1,1,0
0050z00000796uaAAA,1,3,1
0050z00000796vTAAQ,4,3,1
0050z00000796vdAAA,0,0,2
0050z0000079ElWAAU,1,0,0
0050z0000079ElgAAE,2,0,0
