In [2]:
import pandas as pd
import recordlinkage
pd.options.display.max_columns = None

### Load Data fiels

In [3]:
ama = pd.read_stata('ama13.dta')

In [4]:
meetings = pd.read_excel('speakerData.xlsx')
meetings = meetings.drop("meetingID", axis='columns')
meetings['nonus'] = ~(meetings.us==1)

In [5]:
ama.loc[ama.npi=="1528185246"]

Unnamed: 0,ResearchID,npi,MailFirstName,MailMiddleName,MailLastName,AddressType,MailAddress1,MailZipCode,MailZip5,Undeliverable,FIPSCounty,FIPSCity,FIPSState,CensusRegion,CensusDivision,CensusGroup,CensusTract,CensusSuffix,CensusBlockGroup,CensusEnumerationGroup,cmsa,PMSA_MSA,smsa,msa,DOFlag,BirthDate,BirthCity,BirthState,BirthCountry,Sex,DeadFlag,LicPrefState,LicPrefYear,LicGrpState,LicGrpYear,LicHospState,LicHospYear,LicAltOffState,LicAltOffYear,PrimaryTOP,PresentEmployment,PrimarySpecialty,SecondarySpecialty,tops,HospitalID,HospitalHours,PRAFlag,PRAExpiration,GroupID1,GroupID2,MedTrainFlag,MedTrainFrom,MedTrainTo,MedTrainYearInProgram,MedTrainPostGradYear,MedTrainSpec1,MedTrainSpec2,MedTrainingInstitutionCode,MedSchoolID,MedSchoolYOG,ContactFlag,NumberOfOffices,OfficeState,OfficeZipCode,USTrained,ResidencyTrainingState,MedicalSchoolState,OmitFlag,yob,yot
559538,3740050891,1528185246,James,Francis,Burke,2,,48103-3147,48103,0,161,,26,2.0,3.0,6,4007,0,0,,35,440,,A,0,5844.0,CLEVELAND,OH,US1,M,0,MI,2009,,,,,MI,2009,20,110,VN,US,PO,,0,0,,,,0,,18443.0,0,0,,,250256,1643,2005,1,0,MI,48109-5002,0,MI,IL,,1976,2010.0


### Step 2: filter ama list to neurology and related specialities.

Neurology specialities from AMA masterfile

<li>MN	Internal Medicine - Neurology</li>
<li>N	Neurology</li>
<li>NDN	Neurodevelopmental Disabilities</li>
<li>NDP	Neurodevelopmental Disabilities</li>
<li>NMN	Neuromuscliar Medicine</li>
<li>NNM	Neurology / Nuclear Medicine</li>
<li>NS	Neurological Surgery</li>
<li>NSP	Pediatric Neurological Surgery</li>
<li>NUP	Neuropsychiatry</li>
<li>PMN	Pain Medicine (Neurology)</li>
<li>PYN	Psychiatry - Neurology</li>
<li>SMN	Sleep Medicine (Psych & Neurology)</li>
<li>SMA/SME	Sleep Medicine )</li>
<li>HPN Hospice and palliative neurology</li>

<li>VN	Vascliar Neurology</li>
<li>CHN Child neurology</li>
<li> EPL Epilepsy</li>
<li>NO Neuro-otology</li>

In [7]:
ama['MailFirstName'] = ama['MailFirstName'].str.lower()
ama['MailLastName'] = ama['MailLastName'].str.lower()
ama['MailMiddleName'] = ama['MailMiddleName'].str.lower()
meetings['firstName'] = meetings['firstName'].str.lower()
meetings['lastName'] = meetings['lastName'].str.lower()
ama['middleInitial'] = ama['MailMiddleName'].str[:1]
# new data frame with split value columns 
splitName = meetings["firstName"].str.split(" ", n = 1, expand = True) 
# making seperate first name column from new data frame 
meetings["firstName"]= splitName[0] 
meetings["middleInitial"]= splitName[1].str.strip(".")
meetings["middleInitial"]= meetings['middleInitial'].str[:1]
ama['female'] = ama['Sex'] == 'F'
ama['female'] = ama['female'].astype(int)
# the Year of graduation field looks awful...
ama['MedSchoolYOG'] = ama['MedSchoolYOG'].astype(int)
ama['YOGEst'] = ama.yot - 4

In [8]:
neuroSpecialties = ['MN', 'N', 'NDN', 'NMN', 'NNM', 'NS', 'NSP', 'NUP', 'PMN', 'PYN', 'SMN', 'VN', 'CHN', 'EPL', 'SMA', 'SME', 'HPN', 'NO']
primaryNeuro = ama.loc[ama['PrimarySpecialty'].isin(neuroSpecialties)]
secondaryNeuro = ama.loc[ama['SecondarySpecialty'].isin(neuroSpecialties)]
allNeuro = pd.concat([primaryNeuro,secondaryNeuro]).drop_duplicates().reset_index(drop=True)

In [9]:
meetings['totalNonMissing'] = meetings[['lastName', 'firstName', 'middleInitial', 'female', 'medGradYear']].notnull().sum(axis=1)
meetings['weightedNonMissing'] = meetings['middleInitial'].notnull() * 0.25 + 3.05

### Match meetings to the file of neurologists

In [10]:
usmeetings = meetings.loc[meetings.us==1] 
meetingAMAMatchIndex = recordlinkage.index.SortedNeighbourhood(left_on='lastName', right_on='MailLastName', window=11)
personMatch = meetingAMAMatchIndex.index(usmeetings, allNeuro)
personMatch.names = ['meetingID', 'amaID']
dfTest = pd.DataFrame(index=personMatch)
allNeuro.index.name = 'amaID'

In [11]:
personComp = recordlinkage.Compare(n_jobs=12)
personComp.string('lastName', 'MailLastName', method='jarowinkler')
personComp.string('firstName', 'MailFirstName', method='jarowinkler')
personComp.string('middleInitial', 'middleInitial', method='jarowinkler')
personComp.exact('female', 'female')
personComp.numeric('medGradYear', 'YOGEst', method='linear', offset=2, scale=2)

personCompared = personComp.compute(personMatch, usmeetings, allNeuro)
personCompared = personCompared.rename({0:"lastNameWeight", 1:"firstNameWeight", 2:"middleInitialWeight", 3:"femaleWeight", 4:"medGradWeight"}, axis='columns')
personCompared['totalWeight'] = personCompared.lastNameWeight + personCompared.firstNameWeight + personCompared.middleInitialWeight + personCompared.femaleWeight + personCompared.medGradWeight
personCompared['arbWeight'] = personCompared.lastNameWeight + personCompared.firstNameWeight * 0.8 + personCompared.middleInitialWeight * 0.25 + personCompared.femaleWeight + personCompared.medGradWeight * 0.25

In [12]:
joined = personCompared.join(usmeetings, on='meetingID', how='left').join(allNeuro, on='amaID', how='left', lsuffix='left', rsuffix='right')

In [13]:
joined['percentWeight'] = joined.totalWeight / joined.totalNonMissing
joined['arbPercentWeight'] = joined.arbWeight / joined.weightedNonMissing
joined.sort_values(['meetingID', 'percentWeight', 'arbPercentWeight'], ascending=False, inplace=True)
simpleJoin = joined[[ 'percentWeight', 'arbPercentWeight', 'lastName', 'firstName', 'femaleleft', 'middleInitialleft', 'medGradYear', 'MailFirstName', 'MailLastName', 'middleInitialright', 'femaleright', 'YOGEst']]


### Multiple manual comparisons bewteen matches using the (commented out to make notebook more succinct) code below, generating the file matchData.xlsx

In [14]:
#simpleJoin.loc[(1490,)]

In [15]:
#joined.loc[(1490,)]

In [16]:
joined['rankWithinMeeting'] = joined.groupby(["meetingID"]).cumcount()
joined['nextPercentWeight'] = joined.groupby('meetingID')['percentWeight'].shift(-1) 
joined['nextPercentArb'] = joined.groupby('meetingID')['arbPercentWeight'].shift(-1) 
joined['weightDelta'] = joined.percentWeight - joined.nextPercentWeight 
joined['arbDelta'] = joined.arbPercentWeight - joined.nextPercentArb
joined['exactNameMatch'] = (joined.lastNameWeight==1) & (joined.firstNameWeight==1)

In [17]:
exactMatchesForMeeting = joined.groupby("meetingID").exactNameMatch.sum()
exactMatchesForMeeting.rename("exactMatchesForMeeting", inplace=True)
joined = joined.join(other=exactMatchesForMeeting, on='meetingID', how='left')

In [18]:
bestMatchForMeeting = joined.loc[joined['rankWithinMeeting'] == 0]

In [19]:
#matchCharacteristics[['arb','match', 'weight', 'weightDelta', 'arbDelta']].groupby(['match']).describe()

1. The highest unmathced weight is 0.89 and the highest unmathced arb is 0.91...so, we can use those as simple threshods for "if you're above this...call it a match"

2. Matches also have higher "gaps" between themselves and the next best match. so, we can probably also build a criterion on a large "gap" and a slightly lowe weight. Maybe go down to the lowest matched weight (0.73) and arb (0.83) with a 25th percentile matched delta (arb 0.27, weight 0.25) which is larger than the 75th percentile for non matches.

### make matches. in neuro


In [20]:
nearExactWeightMatches = bestMatchForMeeting.loc[bestMatchForMeeting.percentWeight > 0.90]
nearExactArbWeightMatches = bestMatchForMeeting.loc[bestMatchForMeeting.arbPercentWeight > 0.92]

moderateWeightMatchWithLargeGap = bestMatchForMeeting.loc[(bestMatchForMeeting.percentWeight <= 0.90) & (bestMatchForMeeting.percentWeight > 0.73) & (bestMatchForMeeting.weightDelta > 0.25)]
moderateArbMatchWithLargeGap = bestMatchForMeeting.loc[(bestMatchForMeeting.arbPercentWeight <= 0.92) & (bestMatchForMeeting.arbPercentWeight > 0.83) & (bestMatchForMeeting.arbDelta > 0.27)]

exactAndOnlyNameMatch = bestMatchForMeeting.loc[(bestMatchForMeeting.exactNameMatch) & (bestMatchForMeeting.exactMatchesForMeeting==1)]

neuroMatches = pd.concat([nearExactArbWeightMatches, nearExactWeightMatches, moderateArbMatchWithLargeGap, moderateWeightMatchWithLargeGap, exactAndOnlyNameMatch]).drop_duplicates()

60% seem are matched using pretty solid near exact matches. will 

In [21]:
len(neuroMatches)/len(usmeetings)

0.6292042657916325

### now try to match the unmatched to the non-neuro ama file

In [22]:
unmatchedMeetings = usmeetings.loc[~usmeetings.index.isin(neuroMatches.index.get_level_values(0))]
nonNeuro = ama.loc[ama.ResearchID.isin(set(ama.ResearchID) - set(allNeuro.ResearchID))]

In [23]:
nonNeuroMeetingAMAMatch = recordlinkage.index.SortedNeighbourhood(left_on='lastName', right_on='MailLastName', window=11)
nonNeuroPersonMatch = nonNeuroMeetingAMAMatch.index(unmatchedMeetings, nonNeuro)
nonNeuroPersonMatch.names = ['meetingID', 'amaID']

In [None]:
nonNeuroComp = recordlinkage.Compare(n_jobs=12)
nonNeuroComp.string('lastName', 'MailLastName', method='jarowinkler')
nonNeuroComp.string('firstName', 'MailFirstName', method='jarowinkler')
nonNeuroComp.string('middleInitial', 'middleInitial', method='jarowinkler')
nonNeuroComp.exact('female', 'female')
nonNeuroComp.numeric('medGradYear', 'YOGEst', method='linear', offset=2, scale=2)

nonNeuroCompared = nonNeuroComp.compute(nonNeuroPersonMatch, unmatchedMeetings, nonNeuro)

In [None]:
nonNeuroCompared = nonNeuroCompared.rename({0:"lastNameWeight", 1:"firstNameWeight", 2:"middleInitialWeight", 3:"femaleWeight", 4:"medGradWeight"}, axis='columns')
nonNeuroCompared['totalWeight'] = nonNeuroCompared.lastNameWeight + nonNeuroCompared.firstNameWeight + nonNeuroCompared.middleInitialWeight + nonNeuroCompared.femaleWeight + nonNeuroCompared.medGradWeight
nonNeuroCompared['arbWeight'] = nonNeuroCompared.lastNameWeight + nonNeuroCompared.firstNameWeight * 0.8 + nonNeuroCompared.middleInitialWeight * 0.25 + nonNeuroCompared.femaleWeight + nonNeuroCompared.medGradWeight * 0.25

In [None]:
nonNeuroJoined = nonNeuroCompared.join(unmatchedMeetings, on='meetingID', how='left').join(nonNeuro, on='amaID', how='left', lsuffix='left', rsuffix='right')

nonNeuroJoined['percentWeight'] = nonNeuroJoined.totalWeight / nonNeuroJoined.totalNonMissing
nonNeuroJoined['arbPercentWeight'] = nonNeuroJoined.arbWeight / nonNeuroJoined.weightedNonMissing

nonNeuroJoined.sort_values(['meetingID', 'percentWeight', 'arbPercentWeight'], ascending=False, inplace=True)
nonNeuroSimpleJoin = nonNeuroJoined[[ 'percentWeight', 'arbPercentWeight', 'lastName', 'firstName', 'femaleleft', 'middleInitialleft', 'medGradYear', 'MailFirstName', 'MailLastName', 'middleInitialright', 'femaleright', 'YOGEst']]

In [None]:
nonNeuroSimpleJoin.index.get_level_values(0).unique()

In [32]:
#nonNeuroSimpleJoin.loc[(44,)]

In [None]:
nonNeuroJoined.sort_values(['meetingID', 'arbPercentWeight', 'percentWeight'], ascending=False, inplace=True)
nonNeuroJoined['rankWithinMeeting'] = nonNeuroJoined.groupby(["meetingID"]).cumcount()
nonNeuroJoined['nextPercentWeight'] = nonNeuroJoined.groupby('meetingID')['percentWeight'].shift(-1) 
nonNeuroJoined['nextPercentArb'] = nonNeuroJoined.groupby('meetingID')['arbPercentWeight'].shift(-1) 
nonNeuroJoined['weightDelta'] = nonNeuroJoined.percentWeight - nonNeuroJoined.nextPercentWeight 
nonNeuroJoined['arbDelta'] = nonNeuroJoined.arbPercentWeight - nonNeuroJoined.nextPercentArb

bestNonNeuroMatchForMeeting = nonNeuroJoined.loc[nonNeuroJoined['rankWithinMeeting'] == 0]

In [None]:
nonNeuroNearExactWeightMatches = bestNonNeuroMatchForMeeting.loc[(bestNonNeuroMatchForMeeting.percentWeight > 0.90) & (bestNonNeuroMatchForMeeting.weightDelta > 0.01)]
nonNeuroNearExactArbWeightMatches = bestNonNeuroMatchForMeeting.loc[(bestNonNeuroMatchForMeeting.arbPercentWeight > 0.92) & (bestNonNeuroMatchForMeeting.arbDelta > 0.01)]

nonNeuroModerateWeightMatchWithLargeGap = bestNonNeuroMatchForMeeting.loc[(bestNonNeuroMatchForMeeting.percentWeight <= 0.90) & (bestNonNeuroMatchForMeeting.percentWeight > 0.73) & (bestNonNeuroMatchForMeeting.weightDelta > 0.25)]
nonNeuroModerateArbMatchWithLargeGap = bestNonNeuroMatchForMeeting.loc[(bestNonNeuroMatchForMeeting.arbPercentWeight <= 0.92) & (bestNonNeuroMatchForMeeting.arbPercentWeight > 0.83) & (bestNonNeuroMatchForMeeting.arbDelta > 0.27)]

nonNeuroMatches = pd.concat([nonNeuroNearExactWeightMatches, nonNeuroNearExactArbWeightMatches, nonNeuroModerateWeightMatchWithLargeGap, nonNeuroModerateArbMatchWithLargeGap]).drop_duplicates()

In [None]:
len(nonNeuroMatches)/len(usmeetings)

In [None]:
unmatchedMeetings2 = unmatchedMeetings.loc[~unmatchedMeetings.index.isin(nonNeuroMatches.index.get_level_values(0))]
len(unmatchedMeetings2)/len(usmeetings)

In [37]:
#unmatchedMeetings2.tail(50)

### factors we can match on directly: 
<ol>
    <li>first name</li>
    <li>last name</li>
    <li>middle initial</li>
    <li> sex </li>
    <li>med school year of graduation</li>
</ol>

### factors that we can match/filter on somewhat: 
<ol>
    <li> residency training institution vs. current institution (assuming there is a lot of overlap)</li>
    <li> curent hospital name...althjough huge limitation that its missing for 85% of the samle</li>
    <li> current mailing zip code (ama masterfile) vs. hospital zip code (will n eed to be matched into speaker file</li>
    <li>US trained in both files</li>
    <li> maybe there is a way to link the academic filter? although probably not that useful given its prevalence</li>
</ol>

    

In [38]:
meetings['neuroMatch'] = meetings.index.isin(neuroMatches.index.get_level_values(0))
meetings['nonNeuroMatch'] =meetings.index.isin(nonNeuroMatches.index.get_level_values(0))
meetings['unmatched'] = meetings.index.isin(unmatchedMeetings2.index)
meetings[['neuroMatch','nonNeuroMatch', 'unmatched', 'nonus']] = meetings[['neuroMatch','nonNeuroMatch', 'unmatched', 'nonus']].astype(int)
meetings['matchCategory'] = meetings[['neuroMatch','nonNeuroMatch', 'unmatched', 'nonus']].idxmax(axis=1)

### How was the overall match?

In [39]:
meetings.matchCategory.value_counts()

neuroMatch       763
nonus            275
nonNeuroMatch    247
unmatched        209
Name: matchCategory, dtype: int64

### Compare the speakers data across the categories of matches...

1. We have a small problem with the unmatched group where women are more common
2. Women speakers are less common amongst non-US speakers...
3. Non-US speakers have higher pub counts
4. non-neuro meetings have more recnet grads with fewer pubs

In [40]:
import numpy as np
meetings.replace(-1, np.nan, inplace=True)
meetings.pivot_table(index=meetings.matchCategory, dropna=True, aggfunc=( 'mean', 'std')).transpose()

Unnamed: 0,matchCategory,neuroMatch,nonNeuroMatch,nonus,unmatched
academic,mean,0.955439,0.947368,0.989091,0.937799
academic,std,0.206473,0.22375,0.104065,0.2421
female,mean,0.284404,0.303644,0.185654,0.364078
female,std,0.451426,0.460764,0.38965,0.482343
honorificSpeaker,mean,0.0,0.0,0.0,0.0
honorificSpeaker,std,0.0,0.0,0.0,0.0
leadership,mean,0.369594,0.1417,0.047273,0.110048
leadership,std,0.709677,0.502192,0.272763,0.395096
medGradYear,mean,1992.120211,1995.636735,1992.920455,1992.896552
medGradYear,std,10.93536,10.58236,9.135282,11.422556


### Explore the distribution of women in the neurology sample of the AMA masterfile

In [41]:
allNeuro.female.mean()

0.25541018931814685

25.5% (8297/ of all of the neurologists in the ama masterfile for neurology specialties are female

![image.png](attachment:image.png)

### Look at distribution of females by AMA type of service...not terribly enlightening

In [42]:
pd.crosstab(allNeuro.tops, allNeuro.female, dropna=True)

female,0,1
tops,Unnamed: 1_level_1,Unnamed: 2_level_1
IN,3658,524
LO,20,7
,195,50
NC,962,742
NO,86,34
NR,714,148
NT,295,120
PI,596,416
PO,12359,3855
PR,2377,1462


In [43]:
pd.crosstab(allNeuro.tops, allNeuro.female, dropna=True).apply(lambda r: r/r.sum(), axis=1)

female,0,1
tops,Unnamed: 1_level_1,Unnamed: 2_level_1
IN,0.874701,0.125299
LO,0.740741,0.259259
,0.795918,0.204082
NC,0.564554,0.435446
NO,0.716667,0.283333
NR,0.828306,0.171694
NT,0.710843,0.289157
PI,0.588933,0.411067
PO,0.762243,0.237757
PR,0.619172,0.380828


### Look at distribution of females by AMA type of primary specialty

Neurology specialities from AMA masterfile

<li>MN	Internal Medicine - Neurology</li>
<li>N	Neurology</li>
<li>NDN	Neurodevelopmental Disabilities</li>
<li>NDP	Neurodevelopmental Disabilities</li>
<li>NMN	Neuromuscliar Medicine</li>
<li>NNM	Neurology / Nuclear Medicine</li>
<li>NS	Neurological Surgery</li>
<li>NSP	Pediatric Neurological Surgery</li>
<li>NUP	Neuropsychiatry</li>
<li>PMN	Pain Medicine (Neurology)</li>
<li>PYN	Psychiatry - Neurology</li>
<li>SMN	Sleep Medicine (Psych & Neurology)</li>
<li>VN	Vascliar Neurology</li>
<li>CHN Child neurology</li>

In [44]:
pd.crosstab(allNeuro.PrimarySpecialty, allNeuro.female, dropna=True).sort_values(0, ascending=False).head(10)

female,0,1
PrimarySpecialty,Unnamed: 1_level_1,Unnamed: 2_level_1
N,13179,5542
NS,7664,721
CHN,1156,1134
P,513,92
VN,458,227
NMN,173,209
PD,106,64
IM,85,18
OPH,75,15
CN,69,37


In [45]:
pd.crosstab(allNeuro.PrimarySpecialty, allNeuro.female, dropna=True).sort_values(0, ascending=False).apply(lambda r: r/r.sum(), axis=1).head(10)

female,0,1
PrimarySpecialty,Unnamed: 1_level_1,Unnamed: 2_level_1
N,0.703969,0.296031
NS,0.914013,0.085987
CHN,0.504803,0.495197
P,0.847934,0.152066
VN,0.668613,0.331387
NMN,0.45288,0.54712
PD,0.623529,0.376471
IM,0.825243,0.174757
OPH,0.833333,0.166667
CN,0.650943,0.349057


In [46]:
pd.crosstab(allNeuro.PrimarySpecialty, allNeuro.female, dropna=True).sort_values(0, ascending=False).head(10)

female,0,1
PrimarySpecialty,Unnamed: 1_level_1,Unnamed: 2_level_1
N,13179,5542
NS,7664,721
CHN,1156,1134
P,513,92
VN,458,227
NMN,173,209
PD,106,64
IM,85,18
OPH,75,15
CN,69,37


So, women make up 30% of neurologists, 8.5% of neurosurgeons and 50% of child neuroloy

In [47]:
allNeuro['yog10'] = pd.qcut(allNeuro.YOGEst, 10)
allNeuro['yog10Count'] = pd.qcut(allNeuro.YOGEst, 10, labels=range(0,10))

Looking at the % of women based on year of med school graduation — interestingly, the % of women in neurology fell off in 2014+, after steadily increasing prior to that time... 

In [48]:
allNeuro[['yog10', 'female']].groupby("yog10").mean()

Unnamed: 0_level_0,female
yog10,Unnamed: 1_level_1
"(1927.999, 1969.0]",0.062481
"(1969.0, 1976.0]",0.090768
"(1976.0, 1983.0]",0.179709
"(1983.0, 1989.0]",0.211999
"(1989.0, 1994.0]",0.261975
"(1994.0, 1999.0]",0.284381
"(1999.0, 2005.0]",0.337578
"(2005.0, 2010.0]",0.393607
"(2010.0, 2014.0]",0.410699
"(2014.0, 2019.0]",0.35708


In [49]:
rightMatch = neuroMatches[['lastName', 'firstName', 'meeting', 'homeInstitution', 'professionalRank', 'medGradYear', 'pubsScopus', 'scopusHIndex', 'leadership', 'speaker', 'honorificSpeaker', 'academic', 'us']].join(allNeuro, on='amaID', how='right')
rightMatch['matched'] = ~(rightMatch.lastName.isna())
rightMatch.reset_index(inplace=True)
rightMatch.reset_index(inplace=True)

def getCategory(x):
    matches = [x==y for y in ["N", 'NS', "CHN", 'P', 'VN', 'NMN']]
    return x if any(matches) else "Other"
rightMatch['simplifiedSpecialty'] = rightMatch.PrimarySpecialty.apply(getCategory)


### Primnary analysis — accounting for specialty and years since graduation...is there an effect of gender on the probability of presenting at a meeting

In [50]:
import statsmodels.formula.api as smf
import statsmodels.tools.tools as tools
#rightMatch['yog10'].astype('category')
#rightMatch = rightMatch[['matched', 'female', 'yog10Count']].dropna()
rightMatch = rightMatch[['matched', 'female', 'yog10Count', 'simplifiedSpecialty', 'meeting']]
rightMatch['matched'] = rightMatch.matched.astype('int')
adjusted = smf.logit(formula='matched ~ female + C(yog10Count) + C(simplifiedSpecialty) ', data=rightMatch)
adjustedResult = adjusted.fit()
unadjusted = smf.logit(formula='matched ~ female', data=rightMatch)
unadjustedResult = unadjusted.fit()
unadjustedResult.summary()

Optimization terminated successfully.
         Current function value: 0.103625
         Iterations 11
Optimization terminated successfully.
         Current function value: 0.111105
         Iterations 8


0,1,2,3
Dep. Variable:,matched,No. Observations:,32547.0
Model:,Logit,Df Residuals:,32545.0
Method:,MLE,Df Model:,1.0
Date:,"Wed, 12 Dec 2018",Pseudo R-squ.:,0.0004236
Time:,16:04:03,Log-Likelihood:,-3616.1
converged:,True,LL-Null:,-3617.7
,,LLR p-value:,0.08001

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-3.7681,0.043,-87.129,0.000,-3.853,-3.683
female,0.1439,0.081,1.769,0.077,-0.016,0.303


In [51]:
adjustedResult.summary()

0,1,2,3
Dep. Variable:,matched,No. Observations:,31714.0
Model:,Logit,Df Residuals:,31697.0
Method:,MLE,Df Model:,16.0
Date:,"Wed, 12 Dec 2018",Pseudo R-squ.:,0.07893
Time:,16:04:03,Log-Likelihood:,-3286.4
converged:,True,LL-Null:,-3568.0
,,LLR p-value:,1.419e-109

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-4.8983,0.295,-16.598,0.000,-5.477,-4.320
C(yog10Count)[T.1],0.9035,0.312,2.896,0.004,0.292,1.515
C(yog10Count)[T.2],1.7650,0.286,6.171,0.000,1.204,2.326
C(yog10Count)[T.3],1.9190,0.286,6.717,0.000,1.359,2.479
C(yog10Count)[T.4],1.9211,0.287,6.701,0.000,1.359,2.483
C(yog10Count)[T.5],2.2683,0.283,8.013,0.000,1.713,2.823
C(yog10Count)[T.6],2.1031,0.284,7.415,0.000,1.547,2.659
C(yog10Count)[T.7],1.4556,0.299,4.870,0.000,0.870,2.041
C(yog10Count)[T.8],0.2785,0.334,0.833,0.405,-0.377,0.934


In [52]:
adjustedYOG = smf.logit(formula='matched ~ female + C(yog10Count) ', data=rightMatch)
yogResult = adjustedYOG.fit()
yogResult.summary()

Optimization terminated successfully.
         Current function value: 0.106680
         Iterations 11


0,1,2,3
Dep. Variable:,matched,No. Observations:,31714.0
Model:,Logit,Df Residuals:,31703.0
Method:,MLE,Df Model:,10.0
Date:,"Wed, 12 Dec 2018",Pseudo R-squ.:,0.05177
Time:,16:33:08,Log-Likelihood:,-3383.3
converged:,True,LL-Null:,-3568.0
,,LLR p-value:,2.9519999999999997e-73

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-5.4439,0.268,-20.320,0.000,-5.969,-4.919
C(yog10Count)[T.1],1.0597,0.312,3.402,0.001,0.449,1.670
C(yog10Count)[T.2],1.9348,0.286,6.775,0.000,1.375,2.495
C(yog10Count)[T.3],2.0803,0.285,7.295,0.000,1.521,2.639
C(yog10Count)[T.4],2.0860,0.286,7.292,0.000,1.525,2.647
C(yog10Count)[T.5],2.4105,0.282,8.534,0.000,1.857,2.964
C(yog10Count)[T.6],2.2549,0.283,7.978,0.000,1.701,2.809
C(yog10Count)[T.7],1.6806,0.294,5.723,0.000,1.105,2.256
C(yog10Count)[T.8],0.5242,0.332,1.581,0.114,-0.126,1.174


In [55]:
adjustedSpec = smf.logit(formula='matched ~ female + C(simplifiedSpecialty) ', data=rightMatch)
adjustedSpecResult = adjustedSpec.fit()
adjustedSpecResult.summary()

Optimization terminated successfully.
         Current function value: 0.107640
         Iterations 10


0,1,2,3
Dep. Variable:,matched,No. Observations:,32547.0
Model:,Logit,Df Residuals:,32539.0
Method:,MLE,Df Model:,7.0
Date:,"Wed, 12 Dec 2018",Pseudo R-squ.:,0.0316
Time:,16:33:40,Log-Likelihood:,-3503.3
converged:,True,LL-Null:,-3617.7
,,LLR p-value:,9.663000000000001e-46

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-3.4465,0.129,-26.667,0.000,-3.700,-3.193
C(simplifiedSpecialty)[T.N],0.0258,0.131,0.197,0.844,-0.231,0.283
C(simplifiedSpecialty)[T.NMN],-1.0552,0.518,-2.039,0.041,-2.069,-0.041
C(simplifiedSpecialty)[T.NS],-1.5667,0.186,-8.431,0.000,-1.931,-1.202
C(simplifiedSpecialty)[T.Other],-0.5516,0.237,-2.326,0.020,-1.016,-0.087
C(simplifiedSpecialty)[T.P],-2.9442,1.009,-2.919,0.004,-4.921,-0.967
C(simplifiedSpecialty)[T.VN],0.3898,0.224,1.740,0.082,-0.049,0.829
female,-0.0875,0.083,-1.055,0.292,-0.250,0.075


In [79]:
rightMatch2 = rightMatch.loc[rightMatch.simplifiedSpecialty == "Other"]
adjustedSpecNS = smf.logit(formula='matched ~ female + C(simplifiedSpecialty) ', data=rightMatch2)
adjustedSpecResultNS = adjustedSpecNS.fit()
adjustedSpecResultNS.summary()

Optimization terminated successfully.
         Current function value: 0.087863
         Iterations 8


0,1,2,3
Dep. Variable:,matched,No. Observations:,1418.0
Model:,Logit,Df Residuals:,1416.0
Method:,MLE,Df Model:,1.0
Date:,"Wed, 12 Dec 2018",Pseudo R-squ.:,0.009082
Time:,16:47:37,Log-Likelihood:,-124.59
converged:,True,LL-Null:,-125.73
,,LLR p-value:,0.1307

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-4.2302,0.260,-16.266,0.000,-4.740,-3.721
female,0.6412,0.413,1.553,0.120,-0.168,1.450


### Across the linked specialities, no associatio between gender and the probability of presenting before or after adjustment

In [71]:
rightMatch.simplifiedSpecialty.value_counts()

N        18775
NS        8386
CHN       2295
Other     1418
VN         686
P          605
NMN        382
Name: simplifiedSpecialty, dtype: int64

### Next, let's look at differences across meetings...

In [48]:
pd.crosstab(meetings.matchCategory, meetings.meeting)

meeting,AAN,AES,ISC,SNO,Sleep
matchCategory,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
neuroMatch,391,133,178,33,28
nonNeuroMatch,60,35,51,34,67
nonus,18,57,161,22,17
unmatched,43,49,67,17,33


In [49]:
pd.crosstab(meetings.meeting, meetings.matchCategory ).apply(lambda r: r/r.sum(), axis=1)

matchCategory,neuroMatch,nonNeuroMatch,nonus,unmatched
meeting,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AAN,0.763672,0.117188,0.035156,0.083984
AES,0.485401,0.127737,0.208029,0.178832
ISC,0.389497,0.111597,0.352298,0.146608
SNO,0.311321,0.320755,0.207547,0.160377
Sleep,0.193103,0.462069,0.117241,0.227586


In [50]:
pd.crosstab( meetings.meeting, meetings.female, dropna=True).apply(lambda r : r/r.sum(), axis=1)

female,0.0,1.0
meeting,Unnamed: 1_level_1,Unnamed: 2_level_1
AAN,0.683594,0.316406
AES,0.686347,0.313653
ISC,0.783848,0.216152
SNO,0.721154,0.278846
Sleep,0.696552,0.303448


### Most of the meetings look pretty good, but for the ISC...which also has a larger intertnainol presence

In [51]:
pd.crosstab( usmeetings.meeting, usmeetings.female, dropna=True).apply(lambda r : r/r.sum(), axis=1)

female,-1,0,1
meeting,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AAN,0.0,0.684211,0.315789
AES,0.0,0.668203,0.331797
ISC,0.010135,0.733108,0.256757
SNO,0.0,0.738095,0.261905
Sleep,0.0,0.679688,0.320312


### AES + ISC + Sleep both do a bit betyter when you drop international presenters

In [52]:
meetingTypes =  ['AAN', 'ISC', 'AES', 'SNO'] # omitting sleep...because its quite tiny
results = {}

def predictIndividualMeetings(meetingType):
    matchVarName = 'matchded' + meetingType
    rightMatch[matchVarName] = (rightMatch.matched) & (rightMatch.meeting==meetingType)
    rightMatch[matchVarName] = rightMatch[matchVarName].astype('int')
    adjusted = smf.logit(formula=matchVarName + '~ female + C(yog10Count) + C(simplifiedSpecialty)', data=rightMatch)
    output = adjusted.fit()
    results[meetingType] = output

for meeting in meetingTypes:
    predictIndividualMeetings(meeting)

         Current function value: 0.059099
         Iterations: 35




         Current function value: 0.031597
         Iterations: 35




         Current function value: 0.024012
         Iterations: 35




         Current function value: 0.007065
         Iterations: 35




In [53]:
results['AAN'].summary()

0,1,2,3
Dep. Variable:,matchdedAAN,No. Observations:,31714.0
Model:,Logit,Df Residuals:,31697.0
Method:,MLE,Df Model:,16.0
Date:,"Fri, 30 Nov 2018",Pseudo R-squ.:,0.105
Time:,09:03:09,Log-Likelihood:,-1874.3
converged:,False,LL-Null:,-2094.2
,,LLR p-value:,1.609e-83

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-5.8068,0.384,-15.140,0.000,-6.559,-5.055
C(yog10Count)[T.1],0.5313,0.368,1.444,0.149,-0.190,1.253
C(yog10Count)[T.2],1.5325,0.326,4.703,0.000,0.894,2.171
C(yog10Count)[T.3],1.4966,0.329,4.544,0.000,0.851,2.142
C(yog10Count)[T.4],1.0744,0.344,3.123,0.002,0.400,1.749
C(yog10Count)[T.5],1.6319,0.330,4.942,0.000,0.985,2.279
C(yog10Count)[T.6],1.6405,0.329,4.992,0.000,0.996,2.285
C(yog10Count)[T.7],1.4016,0.343,4.085,0.000,0.729,2.074
C(yog10Count)[T.8],-1.0671,0.543,-1.965,0.049,-2.131,-0.003


In [54]:
results['ISC'].summary()

0,1,2,3
Dep. Variable:,matchdedISC,No. Observations:,31714.0
Model:,Logit,Df Residuals:,31697.0
Method:,MLE,Df Model:,16.0
Date:,"Fri, 30 Nov 2018",Pseudo R-squ.:,0.08038
Time:,09:03:10,Log-Likelihood:,-1002.1
converged:,False,LL-Null:,-1089.7
,,LLR p-value:,7.787e-29

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-7.7503,0.817,-9.484,0.000,-9.352,-6.149
C(yog10Count)[T.1],1.4135,0.782,1.806,0.071,-0.120,2.947
C(yog10Count)[T.2],1.6725,0.761,2.199,0.028,0.182,3.163
C(yog10Count)[T.3],2.2122,0.743,2.978,0.003,0.756,3.668
C(yog10Count)[T.4],2.6727,0.732,3.651,0.000,1.238,4.107
C(yog10Count)[T.5],2.8033,0.731,3.837,0.000,1.371,4.235
C(yog10Count)[T.6],2.6109,0.731,3.573,0.000,1.179,4.043
C(yog10Count)[T.7],1.2025,0.774,1.553,0.120,-0.315,2.720
C(yog10Count)[T.8],1.3136,0.766,1.714,0.087,-0.189,2.816


In [55]:
results['AES'].summary()

0,1,2,3
Dep. Variable:,matchdedAES,No. Observations:,31714.0
Model:,Logit,Df Residuals:,31697.0
Method:,MLE,Df Model:,16.0
Date:,"Fri, 30 Nov 2018",Pseudo R-squ.:,0.1097
Time:,09:03:10,Log-Likelihood:,-761.52
converged:,False,LL-Null:,-855.31
,,LLR p-value:,2.5390000000000003e-31

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-6.3386,1.013,-6.257,0.000,-8.324,-4.353
C(yog10Count)[T.1],1.8017,1.070,1.684,0.092,-0.296,3.899
C(yog10Count)[T.2],2.6133,1.029,2.540,0.011,0.597,4.630
C(yog10Count)[T.3],2.9323,1.025,2.859,0.004,0.922,4.942
C(yog10Count)[T.4],3.1255,1.022,3.058,0.002,1.122,5.129
C(yog10Count)[T.5],3.4141,1.019,3.352,0.001,1.418,5.411
C(yog10Count)[T.6],2.6410,1.031,2.561,0.010,0.620,4.662
C(yog10Count)[T.7],1.2400,1.123,1.104,0.270,-0.962,3.442
C(yog10Count)[T.8],1.2481,1.101,1.134,0.257,-0.910,3.406


In [56]:
results['SNO'].summary()

0,1,2,3
Dep. Variable:,matchdedSNO,No. Observations:,31714.0
Model:,Logit,Df Residuals:,31697.0
Method:,MLE,Df Model:,16.0
Date:,"Fri, 30 Nov 2018",Pseudo R-squ.:,0.1135
Time:,09:03:10,Log-Likelihood:,-224.07
converged:,False,LL-Null:,-252.74
,,LLR p-value:,1.451e-06

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-23.3995,1342.309,-0.017,0.986,-2654.276,2607.478
C(yog10Count)[T.1],-5.5630,2.21e+04,-0.000,1.000,-4.32e+04,4.32e+04
C(yog10Count)[T.2],-5.6065,2.14e+04,-0.000,1.000,-4.2e+04,4.2e+04
C(yog10Count)[T.3],15.4941,1342.309,0.012,0.991,-2615.382,2646.371
C(yog10Count)[T.4],16.2001,1342.309,0.012,0.990,-2614.676,2647.076
C(yog10Count)[T.5],16.4107,1342.309,0.012,0.990,-2614.466,2647.287
C(yog10Count)[T.6],16.4114,1342.309,0.012,0.990,-2614.465,2647.288
C(yog10Count)[T.7],16.5957,1342.309,0.012,0.990,-2614.281,2647.472
C(yog10Count)[T.8],-5.5588,2.13e+04,-0.000,1.000,-4.18e+04,4.18e+04


### Upshot: No meeting has a significant gender effect

In [110]:
meetings['anySpeaker'] = meetings.speaker >= 1
meetings['anySpeaker'] = meetings['anySpeaker'].astype(int)
usmeetings['anySpeaker'] = usmeetings.speaker >= 1
usmeetings['anySpeaker'] = usmeetings['anySpeaker'].astype(int)

meetings['anyLeadership'] = meetings.leadership >= 1
meetings['anyLeadership'] = meetings['anyLeadership'].astype(int)

usmeetings['anyLeadership'] = usmeetings.leadership >= 1
usmeetings['anyLeadership'] = usmeetings['anyLeadership'].astype(int)




meetings['yearsFromGraduation'] = 2018-meetings.medGradYear
usmeetings['yearsFromGraduation'] = 2018-usmeetings.medGradYear

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = va

In [111]:
meetings['professionalRank'] = meetings.professionalRank.str.strip()
usmeetings['professionalRank'] = usmeetings.professionalRank.str.strip()

def getSimpleRank(x):
    if isinstance(x, str):
        if "associate" in x.lower():
            return "associate"
        elif "assistant" in x.lower():
            return "assistant"
        elif ("professor" in x.lower()) and ("associate" not in x.lower()) and ("assistant" not in x.lower()):
            return "professor"
        else:
            return "other"    
    else:
        return "no rank"

meetings['simpleRank'] = meetings.professionalRank.apply(getSimpleRank)
usmeetings['simpleRank'] = usmeetings.professionalRank.apply(getSimpleRank)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [112]:
aan = meetings.loc[meetings.meeting=='AAN']

In [107]:
unadjustedSpeakerModel = smf.logit(formula='anySpeaker ~ female ', data=aan)
unadjustedSpeakerResult = unadjustedSpeakerModel.fit()
unadjustedSpeakerResult.summary()

Optimization terminated successfully.
         Current function value: 0.422913
         Iterations 6


0,1,2,3
Dep. Variable:,anySpeaker,No. Observations:,512.0
Model:,Logit,Df Residuals:,510.0
Method:,MLE,Df Model:,1.0
Date:,"Fri, 30 Nov 2018",Pseudo R-squ.:,0.001117
Time:,09:40:07,Log-Likelihood:,-216.53
converged:,True,LL-Null:,-216.77
,,LLR p-value:,0.4866

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.7918,0.153,11.730,0.000,1.492,2.091
female,-0.1823,0.260,-0.700,0.484,-0.693,0.328


In [105]:
speakerModel = smf.logit(formula='anySpeaker ~ female +C(simpleRank) + yearsFromGraduation + pubsScopus + scopusHIndex + academic', data=aan)
speakerResult = speakerModel.fit()
speakerResult.summary()

         Current function value: 0.410414
         Iterations: 35




0,1,2,3
Dep. Variable:,anySpeaker,No. Observations:,495.0
Model:,Logit,Df Residuals:,485.0
Method:,MLE,Df Model:,9.0
Date:,"Fri, 30 Nov 2018",Pseudo R-squ.:,0.02707
Time:,09:37:44,Log-Likelihood:,-203.15
converged:,False,LL-Null:,-208.81
,,LLR p-value:,0.2554

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,2.2780,1.106,2.059,0.039,0.110,4.446
C(simpleRank)[T.associate],0.6667,0.419,1.592,0.111,-0.154,1.488
C(simpleRank)[T.no rank],-0.7462,0.936,-0.798,0.425,-2.580,1.088
C(simpleRank)[T.other],17.7535,7087.815,0.003,0.998,-1.39e+04,1.39e+04
C(simpleRank)[T.professor],0.2769,0.459,0.604,0.546,-0.622,1.176
female,-0.2894,0.281,-1.030,0.303,-0.840,0.261
yearsFromGraduation,-0.0222,0.016,-1.364,0.173,-0.054,0.010
pubsScopus,0.0011,0.001,0.759,0.448,-0.002,0.004
scopusHIndex,-0.0007,0.006,-0.105,0.916,-0.013,0.012


In [106]:
speakerModelPoisson = smf.poisson(formula='speaker ~ female +C(simpleRank) + yearsFromGraduation + pubsScopus + scopusHIndex + academic', data=aan)
speakerResultPoisson = speakerModelPoisson.fit()
speakerResultPoisson.summary()

Optimization terminated successfully.
         Current function value: 1.462402
         Iterations 5


0,1,2,3
Dep. Variable:,speaker,No. Observations:,495.0
Model:,Poisson,Df Residuals:,485.0
Method:,MLE,Df Model:,9.0
Date:,"Fri, 30 Nov 2018",Pseudo R-squ.:,0.01343
Time:,09:37:55,Log-Likelihood:,-723.89
converged:,True,LL-Null:,-733.74
,,LLR p-value:,0.01978

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.2840,0.528,0.537,0.591,-0.752,1.320
C(simpleRank)[T.associate],0.2970,0.118,2.517,0.012,0.066,0.528
C(simpleRank)[T.no rank],-0.2759,0.460,-0.599,0.549,-1.178,0.626
C(simpleRank)[T.other],0.6256,0.314,1.992,0.046,0.010,1.241
C(simpleRank)[T.professor],0.2325,0.145,1.606,0.108,-0.051,0.516
female,0.0799,0.083,0.960,0.337,-0.083,0.243
yearsFromGraduation,-0.0052,0.005,-1.004,0.315,-0.015,0.005
pubsScopus,0.0008,0.000,1.954,0.051,-2.64e-06,0.002
scopusHIndex,-0.0009,0.002,-0.407,0.684,-0.005,0.004


### Upshot — no difference in # of talks or prob of talks at AAN

In [113]:
leadershipModel = smf.logit(formula='anyLeadership ~ female +C(simpleRank) + yearsFromGraduation + pubsScopus + scopusHIndex + academic', data=aan)
leadershipResult = leadershipModel.fit()
leadershipResult.summary()

Optimization terminated successfully.
         Current function value: 0.686378
         Iterations 5


0,1,2,3
Dep. Variable:,anyLeadership,No. Observations:,495.0
Model:,Logit,Df Residuals:,485.0
Method:,MLE,Df Model:,9.0
Date:,"Fri, 30 Nov 2018",Pseudo R-squ.:,0.009413
Time:,09:42:44,Log-Likelihood:,-339.76
converged:,True,LL-Null:,-342.99
,,LLR p-value:,0.6934

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.6752,0.997,-0.677,0.498,-2.630,1.280
C(simpleRank)[T.associate],0.3878,0.276,1.403,0.161,-0.154,0.929
C(simpleRank)[T.no rank],0.2153,0.855,0.252,0.801,-1.461,1.892
C(simpleRank)[T.other],-1.0609,1.137,-0.933,0.351,-3.290,1.168
C(simpleRank)[T.professor],0.0865,0.337,0.257,0.797,-0.573,0.746
female,0.1124,0.201,0.558,0.577,-0.282,0.507
yearsFromGraduation,0.0105,0.012,0.865,0.387,-0.013,0.034
pubsScopus,-0.0004,0.001,-0.367,0.714,-0.002,0.002
scopusHIndex,0.0035,0.005,0.650,0.516,-0.007,0.014


In [114]:
leadershipResultPoisson = smf.poisson(formula='anyLeadership ~ female +C(simpleRank) + yearsFromGraduation + pubsScopus + scopusHIndex + academic', data=aan)
speakerResultPoisson = leadershipResultPoisson.fit()
speakerResultPoisson.summary()

Optimization terminated successfully.
         Current function value: 0.835248
         Iterations 6


0,1,2,3
Dep. Variable:,anyLeadership,No. Observations:,495.0
Model:,Poisson,Df Residuals:,485.0
Method:,MLE,Df Model:,9.0
Date:,"Fri, 30 Nov 2018",Pseudo R-squ.:,0.004173
Time:,09:43:43,Log-Likelihood:,-413.45
converged:,True,LL-Null:,-415.18
,,LLR p-value:,0.943

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-1.0418,0.711,-1.466,0.143,-2.435,0.351
C(simpleRank)[T.associate],0.2038,0.199,1.023,0.306,-0.187,0.594
C(simpleRank)[T.no rank],0.1157,0.607,0.191,0.849,-1.074,1.305
C(simpleRank)[T.other],-0.7419,1.011,-0.734,0.463,-2.723,1.239
C(simpleRank)[T.professor],0.0565,0.244,0.232,0.817,-0.421,0.534
female,0.0576,0.143,0.403,0.687,-0.223,0.338
yearsFromGraduation,0.0053,0.009,0.628,0.530,-0.011,0.022
pubsScopus,-0.0001,0.001,-0.216,0.829,-0.001,0.001
scopusHIndex,0.0014,0.003,0.475,0.635,-0.004,0.007
