In [125]:
from sklearn import preprocessing

import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 50)

import pycountry_convert

import plotly
from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objs as go
import plotly.figure_factory as ff
import plotly.io as pio

init_notebook_mode(connected=True)

In [126]:
def iso2_to_countryname(iso2_code):
    iso2_code = iso2_code.lower()
    if iso2_code == 'uk':
        iso2_code = 'gb'
    if iso2_code == 'el':
        iso2_code = 'gr'
    if iso2_code == 'cs':
        iso2_code = 'rs'
    if iso2_code == 'xk':
        return 'Kosovo'
    if iso2_code == 'ko':
        return 'Kosovo'
    if iso2_code == 'an':
        return 'NETHERLANDS ANTILLES'
    return pycountry_convert.country_alpha2_to_country_name(iso2_code.upper())

# Data preparation

## Horizon Europe

In [127]:
he_projects = pd.read_csv('../data/raw/cordis-HORIZONprojects-csv/csv/project.csv', delimiter=';')
he_projects.head()

Unnamed: 0,id,acronym,status,title,startDate,endDate,totalCost,ecMaxContribution,legalBasis,topics,ecSignatureDate,frameworkProgramme,masterCall,subCall,fundingScheme,nature,objective,contentUpdateDate,rcn,grantDoi
0,101095387,AISym4MED,SIGNED,Synthetic and scalable data platform for medic...,2022-12-01,2026-11-30,6341765,6341765,HORIZON.2.1,HORIZON-HLTH-2022-IND-13-02,2022-11-22,HORIZON,HORIZON-HLTH-2022-IND-13,HORIZON-HLTH-2022-IND-13,RIA,,AISym4Med aims at developing a platform that w...,2022-12-30 17:42:45,244110,10.3030/101095387
1,101062897,IMPROVE,SIGNED,IMaging PROstate cancer using ViscoElastic bio...,2023-09-01,2025-08-31,0,18115296,HORIZON.1.2,HORIZON-MSCA-2021-PF-01-01,2022-06-30,HORIZON,HORIZON-MSCA-2021-PF-01,HORIZON-MSCA-2021-PF-01,MSCA-PF,,IMPROVE aims to demonstrate the potentiality o...,2022-08-26 02:24:17,240147,10.3030/101062897
2,101044731,SAINTBARTH,SIGNED,"Slavery, Abolition and Archipelagic Connection...",2024-01-01,2028-12-31,1992925,1992925,HORIZON.1.1,ERC-2021-COG,2022-07-11,HORIZON,ERC-2021-COG,ERC-2021-COG,HORIZON-AG,,Surveys and datasets of the trans-Atlantic sla...,2022-08-26 14:59:15,240177,10.3030/101044731
3,101065876,TEMP-MOMO,SIGNED,Drivers of spatiotemporal variation in tempera...,2022-09-01,2024-08-31,0,19591488,HORIZON.1.2,HORIZON-MSCA-2021-PF-01-01,2022-07-13,HORIZON,HORIZON-MSCA-2021-PF-01,HORIZON-MSCA-2021-PF-01,HORIZON-AG-UN,,Europe emerges as a major climate warming hots...,2022-08-26 15:30:46,240278,10.3030/101065876
4,101057389,CINDERELLA,SIGNED,Clinical Validation of an AI-based approach to...,2022-06-01,2026-05-31,5379050,5378299,HORIZON.2.1,HORIZON-HLTH-2021-DISEASE-04-04,2022-05-23,HORIZON,HORIZON-HLTH-2021-DISEASE-04,HORIZON-HLTH-2021-DISEASE-04,RIA,,Breast cancer is the most commonly diagnosed c...,2022-09-04 13:43:33,241461,10.3030/101057389


In [128]:
he_organisations = pd.read_csv('../data/raw/cordis-HORIZONprojects-csv/csv/organization.csv', delimiter=';')
he_organisations.head()

Unnamed: 0,projectID,projectAcronym,organisationID,vatNumber,name,shortName,SME,activityType,street,postCode,city,country,nutsCode,geolocation,organizationURL,contactForm,contentUpdateDate,rcn,order,role,ecContribution,netEcContribution,totalCost,endOfParticipation,active
0,101095387,AISym4MED,908114874,BE0692797754,ZABALA BRUSSELS,ZABALA BRUSSELS,True,PRC,RUE BELLIARD 20,1040,BRUXELLES,BE,,"50.840092,4.3814743",,https://ec.europa.eu/info/funding-tenders/oppo...,2022-12-30 17:42:45,1954294,5,participant,464375.0,464375.0,464375.0,False,
1,101095387,AISym4MED,900973055,FI29362646,SAIDOT OY,SAIDOT,True,PRC,TEKNIIKANTIE 2,02150,Espoo,FI,,"60.1668875,24.7488936",,https://ec.europa.eu/info/funding-tenders/oppo...,2022-12-30 17:42:45,1965161,11,participant,548750.0,548750.0,548750.0,False,
2,101095387,AISym4MED,991228063,BE0890217005,TIME.LEX,time.lex,True,PRC,JOSEPH STEVENSSTRAAT 7,1000,BRUSSEL,BE,,"50.8413225,4.3524748",,https://ec.europa.eu/info/funding-tenders/oppo...,2022-12-30 17:42:45,2349568,7,participant,417187.5,417187.5,4171875.0,False,
3,101095387,AISym4MED,999993468,GB649926678,IMPERIAL COLLEGE OF SCIENCE TECHNOLOGY AND MED...,Imperial,False,HES,SOUTH KENSINGTON CAMPUS EXHIBITION ROAD,SW7 2AZ,LONDON,UK,,"51.5073219,-0.1276473",http://www.imperial.ac.uk,https://ec.europa.eu/info/funding-tenders/oppo...,2022-12-30 17:42:45,1905566,14,associatedPartner,,0.0,,False,
4,101095387,AISym4MED,974142483,ESA50086412,INSTRUMENTACION Y COMPONENTES SA,,False,PRC,CALLE ALAUN 8 PLA ZA,50197,ZARAGOZA,ES,,"41.6410997,-0.9956188",www.inycom.es,https://ec.europa.eu/info/funding-tenders/oppo...,2022-12-30 17:42:45,2054577,2,participant,1205000.0,1205000.0,1205000.0,False,


## H2020

In [129]:
h2020_projects = pd.read_csv('../data/raw/cordis-h2020projects-csv/csv/project.csv', delimiter=';')
h2020_projects.head()

Unnamed: 0,id,acronym,status,title,startDate,endDate,totalCost,ecMaxContribution,legalBasis,topics,ecSignatureDate,frameworkProgramme,masterCall,subCall,fundingScheme,nature,objective,contentUpdateDate,rcn,grantDoi
0,101015557,SOAR,SIGNED,Supporting Open Access for Researchers and Pro...,2020-06-01,2023-05-31,299930,299930,H2020-EU.5.e.,IBA-SWAFS-PlanS-2020,2020-12-11,H2020,H2020-IBA-SWAFS-PlanS-2020,H2020-IBA-SWAFS-PlanS-2020,CSA,,Recognizing their duty as research funders and...,2022-09-08 19:31:35,232717,10.3030/101015557
1,682529,PLANTCULT,CLOSED,Identifying the food cultures of ancient Europ...,2016-04-01,2022-03-31,1891875,1891875,,ERC-CoG-2015,2016-03-16,H2020,ERC-2015-CoG,ERC-2015-CoG,ERC-COG,,The project seeks to explore culinary practice...,2023-03-10 14:08:03,202606,10.3030/682529
2,640213,TRA VISIONS 2016,CLOSED,TRA VISIONS 2016,2014-12-01,2016-11-30,4999375,4999375,H2020-EU.3.4.,MG-9.7-2014,2014-11-20,H2020,H2020-MG-2014-2015,H2020-MG-2014_SingleStage_A,CSA,,The concept of this project is to organise two...,2022-09-04 00:18:23,193701,10.3030/640213
3,101017207,DICE,SIGNED,Data Infrastructure Capacity for EOSC,2021-01-01,2023-06-30,6997706,6997706,H2020-EU.1.4.,INFRAEOSC-07-2020,2020-12-03,H2020,H2020-INFRAEOSC-2018-2020,H2020-INFRAEOSC-2020-2,RIA,,The Data Infrastructure Capacities for EOSC (D...,2022-10-11 18:22:53,232561,10.3030/101017207
4,825694,MICROB-PREDICT,SIGNED,MICROBiome-based biomarkers to PREDICT decompe...,2019-01-01,2025-03-31,150000025,15000000,H2020-EU.3.1.,SC1-BHC-03-2018,2018-12-03,H2020,H2020-SC1-BHC-2018-2020,H2020-SC1-2018-Single-Stage-RTD,RIA,,Decompensation of liver cirrhosis and progress...,2023-02-21 15:17:05,220953,10.3030/825694


In [130]:
h2020_organisations = pd.read_csv('../data/raw/cordis-h2020projects-csv/csv/organization.csv', delimiter=';')
h2020_organisations.head()

Unnamed: 0,projectID,projectAcronym,organisationID,vatNumber,name,shortName,SME,activityType,street,postCode,city,country,nutsCode,geolocation,organizationURL,contactForm,contentUpdateDate,rcn,order,role,ecContribution,netEcContribution,totalCost,endOfParticipation,active
0,101000002,Access2EIC,917116668,IL500501960,NATIONAL TECHNOLOGICAL INNOVATION AUTHORITY,Israel Innovation Authority,False,PUB,2 ND DERECH AGUDAT SPORT HAPOEL BUILDING 23 - ...,6812511,Jerusalem,IL,,"32.0653393,34.7649233",,https://ec.europa.eu/info/funding-tenders/oppo...,2022-09-08 19:15:19,3146617,5,participant,35250.0,35250.0,35250,False,
1,101000002,Access2EIC,951533044,SK2020798395,CENTRUM VEDECKO TECHNICKYCH INFORMACII SLOVENS...,CVTISR,False,PUB,LAMACSKA CESTA 8 A,811 04,Bratislava,SK,,"48.1747912,17.0654592",www.cvtisr.sk,https://ec.europa.eu/info/funding-tenders/oppo...,2022-09-08 19:15:19,2210750,13,participant,27150.0,27150.0,27150,False,
2,101000002,Access2EIC,966064323,FR27320252489,BPIFRANCE,BPIFRANCE,False,PRC,AVENUE DU GENERAL LECLERC 27-31,94710,MAISON ALFORT,FR,,"48.8148757,2.4235448",http://www.oseo.fr,https://ec.europa.eu/info/funding-tenders/oppo...,2022-09-08 19:15:19,1908958,8,participant,130375.0,130375.0,130375,False,
3,101000002,Access2EIC,996614570,NO986399445MVA,Innovasjon Norge,Innovasjon Norge,False,OTH,Akersgata 13,0104,Oslo,NO,,"59.9112341,10.7392022",http://www.innovasjonnorge.no,https://ec.europa.eu/info/funding-tenders/oppo...,2022-09-08 19:15:19,2203397,7,participant,60300.0,60300.0,60300,False,
4,101000002,Access2EIC,999419034,BG130534477,GIS-TRANSFERCENTER FOUNDATION,GIS-TC,False,OTH,ACAD G BONCHEV ST BLOCK 4,1113,SOFIA,BG,,"42.6977211,23.3225964",www.gis-tc.org,https://ec.europa.eu/info/funding-tenders/oppo...,2022-09-08 19:15:19,1908968,12,participant,35125.0,35125.0,35125,False,


## FP7

In [131]:
fp7_projects = pd.read_csv('../data/raw/cordis-fp7projects-csv/csv/project.csv', delimiter=';')
fp7_projects.head()

Unnamed: 0,id,acronym,status,title,startDate,endDate,totalCost,ecMaxContribution,legalBasis,topics,ecSignatureDate,frameworkProgramme,masterCall,subCall,fundingScheme,nature,objective,contentUpdateDate,rcn,grantDoi
0,314743,WINN,CLO,European Platform Driving KnoWledge to INNovat...,2012-10-01,2015-03-31,114742305,1023117,FP7-TRANSPORT,GC.SST.2012.3-3.,,FP7,,FP7-SST-2012-RTD-1,CSA-CA,,European Platform Driving KnoWledge to INNovat...,2016-07-25 10:06:36,105756,
1,218355,CHEMXCHANGE,CLO,Development of a cost effective system for exc...,2009-01-01,2012-10-31,29731608,2226426,FP7-SME,SME-2,,FP7,,FP7-SME-2007-2,BSG-SME-AG,,This proposal addresses the great challenge fa...,2017-05-29 17:05:22,89532,
2,319179,M-FUTURE2013,CLO,ManuFuture View on Horizon 2020,2012-09-01,2014-01-31,48419348,444876,FP7-NMP,NMP.2012.4.0-4,,FP7,,FP7-NMP-2012-CSA-6,CSA-SA,,In order to increase the coordination efforts ...,2022-02-11 10:30:54,104759,
3,268116,WIRE2010,CLO,Week of Innovative Regions in Europe,2009-09-01,2010-12-31,632666,600000,FP7-REGIONS,REGIONS,,FP7,,FP7-Adhoc-2007-13,CSA-SA,,The Week of Innovative Regions in Europe (WIRE...,2017-05-29 21:09:56,96428,
4,611433,MED-Dialogue,CLO,PROMOTING ICT DIALOGUE AND COOPERATION WITH TH...,2014-01-01,2016-06-30,94808800,77718900,FP7-ICT,ICT-2013.10.3,,FP7,,FP7-ICT-2013-10,CSA,,In line with the new concept in Horizon 2020 w...,2017-04-22 03:05:07,189032,


In [132]:
fp7_organisations = pd.read_csv('../data/raw/cordis-fp7projects-csv/csv/organization.csv', delimiter=';')
fp7_organisations.head()


Columns (17,20) have mixed types. Specify dtype option on import or set low_memory=False.



Unnamed: 0,projectID,projectAcronym,organisationID,vatNumber,name,shortName,SME,activityType,street,postCode,city,country,nutsCode,geolocation,organizationURL,contactForm,contentUpdateDate,rcn,order,role,ecContribution,netEcContribution,totalCost,endOfParticipation,active
0,314743,WINN,999663183.0,PL7770020410,INSTYTUT LOGISTYKI I MAGAZYNOWANIA,ILiM,,REC,UL ESTKOWSKIEGO 6,61-755,POZNAN,PL,,"52.34864345,16.77593215258902",http://www.ilim.poznan.pl,https://ec.europa.eu/info/funding-tenders/oppo...,2016-07-25 10:06:36,1913608,4,participant,116202.0,,,False,
1,314743,WINN,957164476.0,ESG99187494,CENTRO NACIONAL DE COMPETENCIA EN LOGISTICA IN...,CNC-LOGISTICA,,OTH,"EDIFICIO NAYADE , BLOQUE 5, CALLE BARI 55",50197,ZARAGOZA,ES,,"41.6521342,-0.8809427",www.cnc-logistica.org,https://ec.europa.eu/info/funding-tenders/oppo...,2016-07-25 10:06:36,1947334,1,coordinator,255811.0,,,False,
2,314743,WINN,973028341.0,IT03002431207,BLUEGREEN STRATEGY SRL,BLUEGREEN,,PRC,VIA ISONZO 59/2,40033,CASALECCHIO DI RENO BO,IT,,"44.4867184,11.2696976",www.bluegreenstrategy.com,https://ec.europa.eu/info/funding-tenders/oppo...,2016-07-25 10:06:36,1913614,6,participant,74070.75,,,False,
3,314743,WINN,959628470.0,BE0840574977,ALLIANCE FOR LOGISTICS INNOVATION THROUGH COLL...,ALICE,,REC,AVENUE JACQUES BREL 38/0,1200,BRUXELLES,BE,,"50.8465565,4.351697",,https://ec.europa.eu/info/funding-tenders/oppo...,2016-07-25 10:06:36,1947336,3,participant,107654.0,,,False,
4,314743,WINN,970780463.0,NL821037055B01,STICHTING DUTCH INSTITUTE FOR ADVANCED LOGISTICS,DINALOG,,REC,GRAAF ENGELBERTLAAN 75 2E VERDIEPING,4837 DS,BREDA,NL,,"51.5752505,4.7300273",www.dinalog.nl,https://ec.europa.eu/info/funding-tenders/oppo...,2016-07-25 10:06:36,1913162,2,participant,313411.0,,,False,


## FP6

In [133]:
fp6_projects = pd.read_csv('../data/raw/cordis-fp6projects.csv', delimiter=';')
fp6_projects.head()

Unnamed: 0,rcn,id,acronym,status,programme,topics,frameworkProgramme,title,startDate,endDate,projectUrl,objective,totalCost,ecMaxContribution,call,fundingScheme,coordinator,coordinatorCountry,participants,participantCountries,subjects
0,72765,4265,COREGRID,,FP6-IST,IST-2002-2.3.2.8,FP6,"European research network on foundations, soft...",2004-09-01,2008-08-31,http://www.coregrid.net/,CoreGRID aims at strengthening and advancing s...,,8200000,,NoE,GEIE ERCIM,FR,UNIVERSITE HENRI POINCARE NANCY 1;UNIVERSITE J...,FR;UK;SE;HU;BE;IT;DE;FI;CY;CZ;NL;ES;BG;PL;CL;A...,
1,79369,034084,SELFMAN,,FP6-IST,IST-2005-2.5.5,FP6,Self Management for large-scale distributed sy...,2006-06-01,2009-05-31,http://www.ist-selfman.org/wiki/index.php/self...,The goal of SELFMAN is to make large-scale dis...,2574259.0,1960000,,STREP,UNIVERSITE CATHOLIQUE DE LOUVAIN,BE,KONRAD-ZUSE-ZENTRUM FUER INFORMATIONSTECHNIK B...,DE;FR;SE;SG,
2,80458,517675-2,MICROTRAP,,FP6-IST,IST-2004-2.3.4.1 b7,FP6,Development of a pan-European Microtrap Techno...,2006-04-01,2009-03-31,,MICROTRAP is a Strep proposal aimed at develop...,2043500.0,1769000,,STREP,NPL MANAGEMENT LIMITED,UK,"UNIVERSITAET INNSBRUCK;THE CHANCELLOR, MASTERS...",AT,
3,78376,027305,NOBEL PHASE 2,,FP6-IST,IST-2004-2.4.4,FP6,Next generation optical network for Broadband ...,2006-03-01,2008-02-29,http://www.ist-nobel.org/,To achieve the strategic goal of broadband for...,21237629.0,12109996,,IP,TELECOM ITALIA SPA,IT,CORECOM - CONSORZIO RICERCHE ELABORAZIONE COMM...,IT;DE,
4,75263,502885,MERSEA,,FP6-AEROSPACE,AERO-2.3,FP6,Marine Environment and security for the Europe...,2004-04-01,2008-09-30,,Marine Environment and security for the Europe...,24320809.0,13997699,FP6-2002-SPACE-1,IP,INSTITUT FRANCAIS DE RECHERCHE POUR L'EXPLOITA...,FR,EUROPEAN CENTRE FOR MEDIUM-RANGE WEATHER FOREC...,UK;IE;CY;NL;IT;ES;DE;FR;FI;NO;CA;BE;EL;TR;DK,


In [134]:
fp6_organisations = pd.read_csv('../data/raw/cordis-fp6organizations.csv', delimiter='\t')
fp6_organisations.head()


Columns (1,15) have mixed types. Specify dtype option on import or set low_memory=False.



Unnamed: 0,projectRcn,projectID,projectAcronym,role,id,name,shortName,activityType,endOfParticipation,ecContribution,country,street,city,postCode,organizationUrl,vatNumber,contactForm,contactType,contactTitle,contactFirstNames,contactLastNames,contactFunction,contactTelephoneNumber,contactFaxNumber
0,72765,4265,COREGRID,participant,,UNIVERSITE HENRI POINCARE NANCY 1,,,,,FR,Rue Lionnois 24-30,NANCY,BP 60120,http://www.uhp-nancy.fr,,,,,,,,,
1,72765,4265,COREGRID,participant,,UNIVERSITE JOSEPH FOURIER GRENOBLE 1,,,,,FR,621 AVENUE CENTRALE - DOMAINE UNIVERSITAIRE,GRENOBLE,38041,,,,,,,,,,
2,72765,4265,COREGRID,participant,,THE QUEEN'S UNIVERSITY OF BELFAST,,,,,UK,UNIVERSITY ROAD,BELFAST,BT7 1NN,,,,,,,,,,
3,72765,4265,COREGRID,participant,,"SICS, SWEDISH INSTITUTE OF COMPUTER SCIENCE AB",SICS,,,,SE,ISAFJORDSGATAN 22,KISTA,164 29,,,,,,,,,,
4,72765,4265,COREGRID,participant,,MAGYAR TUDOMANYOS AKADEMIA SZAMITASTECHNIKAI E...,,,,,HU,KENDE U. 13-17,BUDAPEST,1111,http://WWW.SZTAKI.HU,,,,,,,,,


## FP5

In [135]:
fp5_projects = pd.read_csv('../data/raw/cordis-fp5projects.csv', delimiter=';')
fp5_projects.head()

Unnamed: 0,rcn,id,acronym,status,programme,topics,frameworkProgramme,title,startDate,endDate,projectUrl,objective,totalCost,ecMaxContribution,call,fundingScheme,coordinator,coordinatorCountry,participants,participantCountries,subjects
0,67894,HPRN-CT-2002-00284,SMART SYSTEMS,,FP5-HUMAN POTENTIAL,1.4.1.-1.1.,FP5,"New materials, adaptive systems and their nonl...",2002-11-01,2006-10-31,,The project aims at providing training of rese...,1500000.0,1500000.0,,NET,CHAMBRE DE COMMERCE ET D'INDUSTRIE DE PARIS,FR,UNIVERSIDADE DE SANTIAGO DE COMPOSTELA;INSTITU...,ES;FR;PT;EL;IT;PL;DE,
1,61178,HPRI-CT-1999-50007,ARMS,,FP5-HUMAN POTENTIAL,1.4.1.-2.,FP5,Advanced research magnet systems,2000-03-01,2003-06-01,,Description:\n\nA pulsed magnet station will b...,1200000.0,1200000.0,,CSC,"THE CHANCELLOR, MASTERS AND SCHOLARS OF THE UN...",UK,CENTRE NATIONAL DE LA RECHERCHE SCIENTIFIQUE;K...,FR;BE;NL;DE,
2,61341,NNE5/363/2001,TDT-3R MULTI FUEL,,FP5-EESD,1.1.4.-5.1.1,FP5,Multi Fuel Operated Integrated Clean Energy Pr...,2002-08-01,2005-07-31,http://www.3ragrocarbon.com,Develop efficient energy conversion process wi...,2248440.0,1451390.0,,,"TERRA HUMANA CLEAN TECHNOLOGY DEVELOPMENT, ENG...",HU,ENERGY RESEARCH CENTRE OF THE NETHERLANDS;ROST...,NL;DE;EL;HU;LV;UK,
3,63349,G7RT-CT-2002-05086,,,FP5-GROWTH,1.1.3.-7.,FP5,European virtual institute for geometry measur...,2002-07-01,2006-12-31,,The Virtual Institute EVIGeM consists of a den...,2394417.0,2236532.0,,THN,BREMER INSTITUT FUER BETRIEBSTECHNIK UND ANGEW...,DE,DIN CERTCO GESELLSCHAFT FUER KONFORMITAETSBEWE...,DE;CZ;UK;CH;IT;EL;DK;ES;AT;BE;NL;PT;NO,
4,61025,IST-2001-32672,WOMAN II,,FP5-IST,2001-1.1.3,FP5,European Network of Services for Women Health ...,2001-09-01,2003-02-28,http://www.womanlab.com,"WOMAN, an EC funded project (1998-2000) create...",1090513.0,774004.0,,ACM,DIPARTIMENTO DI MEDICINA DELLA PROCREAZIONE E ...,IT,UNIVERSITY OF LEICESTER;INSTITUTO PALACIOS DE ...,UK;ES;PL;NL;IT;PT;IL;CH;DE;FR;SE;MT;BE,


In [136]:
fp5_organisations = pd.read_csv('../data/raw/cordis-fp5organizations.csv', delimiter='\t')
fp5_organisations.head()


Columns (7,15) have mixed types. Specify dtype option on import or set low_memory=False.



Unnamed: 0,projectRcn,projectID,projectAcronym,role,id,name,shortName,activityType,endOfParticipation,ecContribution,country,street,city,postCode,organizationUrl,vatNumber,contactForm,contactType,contactTitle,contactFirstNames,contactLastNames,contactFunction,contactTelephoneNumber,contactFaxNumber
0,67894,HPRN-CT-2002-00284,SMART SYSTEMS,participant,,UNIVERSIDADE DE SANTIAGO DE COMPOSTELA,,,,,ES,Campus Universitario Sur,SANTIAGO DE COMPOSTELA,15782,,,,,,,,,,
1,67894,HPRN-CT-2002-00284,SMART SYSTEMS,participant,,INSTITUT NATIONAL DE RECHERCHE EN INFORMATIQUE...,,,,,FR,,LE CHESNAY,78153,,,,,,,,,,
2,67894,HPRN-CT-2002-00284,SMART SYSTEMS,participant,,INSTITUTO SUPERIOR TECNICO,,,,,PT,Avenida Rovisco Pais 1,LISBOA,1049-001,,,,,,,,,,
3,67894,HPRN-CT-2002-00284,SMART SYSTEMS,participant,,ARISTOTLE UNIVERSITY OF THESSALONIKI,,,,,EL,"University Campus, Egnatia Street, Administration",THESSALONIKI,54006,,,,,,,,,,
4,67894,HPRN-CT-2002-00284,SMART SYSTEMS,participant,,NATIONAL RESEARCH COUNCIL OF ITALY,,,,,IT,Via Ferrata 1,PAVIA,27100,,,,,,,,,,


## FP4

In [137]:
fp4_projects = pd.read_csv('../data/raw/cordis-fp4projects.csv', delimiter=';')
fp4_projects.head()

Unnamed: 0,rcn,id,acronym,status,programme,topics,frameworkProgramme,title,startDate,endDate,projectUrl,objective,totalCost,ecMaxContribution,call,fundingScheme,coordinator,coordinatorCountry,participants,participantCountries,subjects
0,44889,JOR3980231,,,FP4-NNE-JOULE C,3010103.0,FP4,Development of a low cost European desalinatio...,1998-08-01,2001-01-31,http://www.eurotrough.com,\n1. Detailed EUROTrough design including proc...,,,,CSC,INSTALACIONES ABENGOA SA,ES,"CENTRO DE INVESTIGACIONES ENERGETICAS, MEDIAMB...",ES;DE;EL,
1,43710,IC20970046,,,FP4-INCO,1.0,FP4,Predrying of moist fuels for power production,1998-01-01,2000-12-31,,"\n\nIn various EU and East European countries,...",,,,CSC,UNIVERSITAET STUTTGART,DE,Technical University of Timisoara;Technical Un...,RO;PL;BG,
2,48019,IC21980401,,,FP4-INCO,,FP4,Integration of renewable energies in the South...,1998-11-01,2000-10-31,,\n\nIt is generally recognised that renewable ...,,,,CSC,OBSERVATOIRE MEDITERRANEEN DE L'ENERGIE,FR,SOCIETE TUNISIENNE DE L'ELECTRICITE ET DU GAZ;...,TN;EG;PS;MA;DZ;JO;TR,
3,35176,BRPR960224,MICROMOD-SX,,FP4-BRITE/EURAM 3,201.0,FP4,Predictive Microstructural Assessment and Micr...,1997-01-01,2000-12-31,,\n\nEuropean gas turbine manufacturers have a ...,,,,CSC,Ruhr-Universität Bochum,DE,INSTITUTE OF MECHANICS OF MATERIALS AND GEOSTR...,EL;FR;IT;UK;DE,
4,37158,JOR3960099,,,FP4-NNE-JOULE C,305.0,FP4,A novel approach for the integration of biomas...,1996-10-01,1998-09-30,,\n\nObjectives \n\nThe proposed integrated app...,,,,CSC,AGRICULTURAL UNIVERSITY OF ATHENS,EL,CENTRE FOR RENEWABLE ENERGY SOURCES;Hellenic A...,EL;AT;DE;IT,


In [138]:
fp4_organisations = pd.read_csv('../data/raw/cordis-fp4organizations.csv', delimiter='\t')
fp4_organisations.head()


Columns (7,15) have mixed types. Specify dtype option on import or set low_memory=False.



Unnamed: 0,projectRcn,projectID,projectAcronym,role,id,name,shortName,activityType,endOfParticipation,ecContribution,country,street,city,postCode,organizationUrl,vatNumber,contactForm,contactType,contactTitle,contactFirstNames,contactLastNames,contactFunction,contactTelephoneNumber,contactFaxNumber
0,44889,JOR3980231,,participant,,"CENTRO DE INVESTIGACIONES ENERGETICAS, MEDIAMB...",,,,,ES,"22,Avenida Complutense 22",MADRID,28040,,,,,,,,,,
1,44889,JOR3980231,,participant,,GERMAN AEROSPACE CENTRE,,,,,DE,Linder Höhe,KOELN,51147,http://www.dlr.de/et,,,,,,,,,
2,44889,JOR3980231,,participant,,SBP GMBH,,,,,DE,Hohenzollernstrasse 1,STUTTGART,70178,,,,,,,,,,
3,44889,JOR3980231,,coordinator,,INSTALACIONES ABENGOA SA,,,,,ES,C/ Manuel Velasco Pando 7,SEVILLA,41007,,,,,,,,,,
4,44889,JOR3980231,,participant,,Centre for Renewable Energy Sources,,,,,EL,"KM 19TH,Marathonos Avenue",Pikermi Attiki,19009,,,,,,,,,,


## FP3

In [139]:
fp3_projects = pd.read_csv('../data/raw/cordis-fp3projects.csv', delimiter=';')
fp3_projects.head()

Unnamed: 0,rcn,id,acronym,status,programme,topics,frameworkProgramme,title,startDate,endDate,projectUrl,objective,totalCost,ecMaxContribution,call,fundingScheme,coordinator,coordinatorCountry,participants,participantCountries,subjects
0,18045,JOU20418,EUCLIDES,,FP3-JOULE 2,304.0,FP3,European concentrated light intensity developm...,1994-01-01,1995-12-31,,The goal of this project is design all the com...,,,,CSC,BP SOLAR LTD,UK,UNIVERSIDAD POLITECNICA DE MADRID;UNIVERSITY O...,ES;UK;DE,
1,8957,6549,MOSAIC,,FP3-ESPRIT 3,,FP3,Monolithic and Hybrid Optoelectronic Smart-Ass...,1992-04-01,1995-03-31,,In order to demonstrate the functionality of t...,,,,,Alcatel Alsthom Recherche,FR,EIDGENOESSISCHE TECHNISCHE HOCHSCHULE;Commissa...,CH;FR;DE;BE;DK;UK,
2,6120,BIO2930001,CARBBANK,,FP3-BIOTECH 1,4.2,FP3,A complex carbohydrate structural database,1993-11-01,1996-04-30,,The CarbBank project has been an international...,,256000.0,,CSC,CARLSBERG LABORATORY,DK,UTRECHT UNIVERSITY;UNIVERSITY OF HAMBURG,NL;DE,
3,6207,BIO2930400,,,FP3-BIOTECH 1,2.1,FP3,Plant molecular genetics for an environmentall...,1993-11-01,1997-08-31,,The EU Biotechnology Programme (1992-94) work ...,,18840710.0,,CSC,JOHN INNES CENTRE,UK,"Ente per le Nuove Tecnologie, l'Energia e l'Am...",IT;CH;FR;UK;BE;ES;EL;NL;DK;DE;PT;IE,
4,5879,BIO2930075,ESSA,,FP3-BIOTECH 1,1.2,FP3,European scientists sequencing Arabidopsis,1993-09-01,1997-02-28,,The aim of this project was to initiate the se...,,1022957.0,,CSC,JOHN INNES CENTRE,UK,"THE PROVOST, FELLOWS AND SCHOLARS OF THE COLLE...",IE;FR;BE;ES;NL;DE;UK,


In [140]:
fp3_organisations = pd.read_csv('../data/raw/cordis-fp3organizations.csv', delimiter='\t')
fp3_organisations.head()

Unnamed: 0,projectRcn,projectID,projectAcronym,role,id,name,shortName,activityType,endOfParticipation,ecContribution,country,street,city,postCode,organizationUrl,vatNumber,contactForm,contactType,contactTitle,contactFirstNames,contactLastNames,contactFunction,contactTelephoneNumber,contactFaxNumber
0,18045,JOU20418,EUCLIDES,coordinator,,BP SOLAR LTD,,,,,UK,CHERTSEY ROAD,SUNBURY-ON-THAMES,TW167XA,http://www.bpsolar.com,,,,,,,,,
1,18045,JOU20418,EUCLIDES,participant,,UNIVERSIDAD POLITECNICA DE MADRID,,,,,ES,Ciudad Universitaria - ETSI Telecomunicacion,MADRID,28040,,,,,,,,,,
2,18045,JOU20418,EUCLIDES,participant,,UNIVERSITY OF READING,,,,,UK,Whiteknights,Reading,RG6 6AY,,,,,,,,,,
3,18045,JOU20418,EUCLIDES,participant,,CENTER FOR SOLAR ENERGY AND HYDROGEN RESEARCH ...,,,,,DE,Hessbruehlstrasse 21c,STUTTGART,70565,,,,,,,,,,
4,8957,6549,MOSAIC,participant,,EIDGENOESSISCHE TECHNISCHE HOCHSCHULE,,,,,CH,ETH-ZENTRUM,ZURICH,8092,,,,,,,,,,


## FP2

In [141]:
fp2_projects = pd.read_csv('../data/raw/cordis-fp2projects.csv', delimiter=';')
fp2_projects.head()

Unnamed: 0,rcn,id,acronym,status,programme,topics,frameworkProgramme,title,startDate,endDate,projectUrl,objective,totalCost,ecMaxContribution,call,fundingScheme,coordinator,coordinatorCountry,participants,participantCountries,subjects
0,7639,AGRE0067,,,FP2-ECLAIR,,FP2,Upgrading the genetic quality of European hard...,1991-03-01,1995-02-28,,The objective of this project is to upgrade th...,,,,CSC,"Teagasc, Agriculture and Food Development Auth...",IE,Azienda Agricola Meristema Srl;Bonsai-Flora SA...,IT;ES;IE;NL;BE;DE;DK;FR,
1,135,80010118,,,FP2-CAMAR,,FP2,IMPROVING WATER VALORISATION IN SUNFLOWER CROP...,1992-01-01,1995-01-01,,\nProblems of drought are of major importance ...,,,,,Institut National de la Recherche Agronomique ...,FR,RUSTICA SEMENCES;Università degli Studi di Udi...,FR;IT;DE;ES,
2,2605,JOUR0067,,,FP2-JOULE 1,,FP2,Wind measurements and modelling in complex ter...,1990-09-01,1995-06-30,,To deeper knowledge of local wind characterist...,,,,CSC,RISOE NATIONAL LABORATORY,DK,UNIVERSITY COLLEGE DUBLIN;Centre for Renewable...,IE;EL;UK;ES;IT;FR;PT;FI;DE,
3,2594,JOUE0055,,,FP2-JOULE 1,,FP2,Replacement of CFC's in refrigeration systems,1990-11-01,1993-03-31,,To investigate a number of refrigerants on the...,,,,CSC,UNIVERSITY OF ULSTER,UK,Akzo International Research NV;IMPERIAL CHEMIC...,NL;UK;DE;IT,
4,2820,MA2M0012,,,FP2-MATREC C,,FP2,SILICA REMOVAL FROM BAUXITES,1991-03-01,1993-02-28,,The aim of the project is to develop a highly ...,,,,CSC,National Technical University of Athens,EL,Bauxite Parnasse Mining Company SA;PROGEMISA S...,EL;IT;UK,


In [142]:
fp2_organisations = pd.read_csv('../data/raw/cordis-fp2organizations.csv', delimiter='\t')
fp2_organisations.head()

Unnamed: 0,projectRcn,projectID,projectAcronym,role,id,name,shortName,activityType,endOfParticipation,ecContribution,country,street,city,postCode,organizationUrl,vatNumber,contactForm,contactType,contactTitle,contactFirstNames,contactLastNames,contactFunction,contactTelephoneNumber,contactFaxNumber
0,7639,AGRE0067,,coordinator,,"Teagasc, Agriculture and Food Development Auth...",,,,,IE,Malahide Road,Dublin,17,,,,,,,,,,
1,7639,AGRE0067,,participant,,Azienda Agricola Meristema Srl,,,,,IT,Via Martiri della Libertá 13,Cascine di Buti Pisa,56030,,,,,,,,,,
2,7639,AGRE0067,,participant,,Bonsai-Flora SAT 681 XUGA,,,,,ES,"7,Fondeadero de Crendes",Abegondo La Coruña,15318,,,,,,,,,,
3,7639,AGRE0067,,participant,,NATIONAL RESEARCH COUNCIL OF ITALY,,,,,IT,Via Atto Vannucci 13,FIRENZE,50134,,,,,,,,,,
4,7639,AGRE0067,,participant,,Coillte Teoranta,,,,,IE,"2,Sidmonton Place","Bray, County Wicklow",26,,,,,,,,,,


## FP1

In [143]:
fp1_projects = pd.read_csv('../data/raw/cordis-fp1projects.csv', delimiter=';')
fp1_projects.head()

Unnamed: 0,rcn,id,acronym,status,programme,topics,frameworkProgramme,title,startDate,endDate,projectUrl,objective,totalCost,ecMaxContribution,call,fundingScheme,coordinator,coordinatorCountry,participants,participantCountries,subjects
0,11643,MA1B0008,,,FP1-RAWMAT 3C,,FP1,A STUDY OF THE PARASITIC FUNGI OF THE GENUS AR...,1988-01-01,1989-12-31,,TO DEVELOP NEW METHODS OF IDENTIFICATION OF AR...,,,,CSC,Institut National de la Recherche Agronomique ...,FR,"THE CHANCELLOR, MASTERS AND SCHOLARS OF THE UN...",UK;FR;IT;DE,
1,11881,EV4V0111,EROS 2000,,FP1-ENVPROT 4C,,FP1,EUROPEAN RIVER OCEAN SYSTEM,1988-02-01,1990-05-31,,TO INVESTIGATE THE BIOGEOCHEMICAL INTERACTIONS...,,,,CSC,Centre National de la Recherche Scientifique (...,FR,University College Dublin;UNIVERSITY OF WALES ...,IE;UK;IT;ES;PT;DE;NL;DK;EL;BE,
2,10946,ST2*0172,,,FP1-STIMULATION 1C,,FP1,MOLECULAR GENETICS OF HUMAN ERYTHROCYTE GLUCOS...,1986-04-01,1989-03-31,,\n\nANALYZE THE ENTIRE STRUCTURE OF THE GD GEN...,,,,CSC,Royal Postgraduate Medical School,UK,NATIONAL RESEARCH COUNCIL OF ITALY,IT,
3,13795,BAP*0059,,,FP1-BAP,,FP1,CONTINUOUS SYNTHESIS OF FINE CHEMICALS BY COFA...,1986-07-01,1989-12-31,,THE LONG TERM OBJECTIVE OF THE JOINT PROJECT I...,,,,CSC,FRAUNHOFER-GESELLSCHAFT ZUR FOERDERUNG DER ANG...,DE,GBF - NATIONAL CENTRE FOR BIOTECHNOLOGY;Consig...,DE;IT,
4,13796,BAP*0060,,,FP1-BAP,,FP1,CONTINUOUS SYNTHESIS OF FINE CHEMICALS BY COFA...,1986-07-01,1990-12-31,,\nTo produce nicotinamide adenine dinucleotide...,,,,CSC,Gesellschaft für Biotechnologische Forschung mbH,DE,FRAUNHOFER-GESELLSCHAFT ZUR FOERDERUNG DER ANG...,DE;IT,


In [144]:
fp1_organisations = pd.read_csv('../data/raw/cordis-fp1organizations.csv', delimiter='\t')
fp1_organisations.head()

Unnamed: 0,projectRcn,projectID,projectAcronym,role,id,name,shortName,activityType,endOfParticipation,ecContribution,country,street,city,postCode,organizationUrl,vatNumber,contactForm,contactType,contactTitle,contactFirstNames,contactLastNames,contactFunction,contactTelephoneNumber,contactFaxNumber
0,11643,MA1B0008,,participant,,"THE CHANCELLOR, MASTERS AND SCHOLARS OF THE UN...",UOXF.CZ,,,,UK,"University Offices, Wellington Square",OXFORD,,http://www.ox.ac.uk,,,,,,,,,
1,11643,MA1B0008,,participant,,École Nationale des Ingénieurs des Travaux Agr...,,,,,FR,1 cours du Général de Gaulle,Gradignan,33170.0,,,,,,,,,,
2,11643,MA1B0008,,coordinator,,Institut National de la Recherche Agronomique ...,,,,,FR,147 rue de l'Université,Paris,75341.0,,,,,,,,,,
3,11643,MA1B0008,,participant,,Consiglio Nazionale delle Ricerche (CNR),,,,,IT,Piazzale delle Cascine 28,Firenze,50144.0,,,,,,,,,,
4,11643,MA1B0008,,participant,,Ludwig-Maximilians-Universität München,,,,,DE,Amalienstraße 52,München,80799.0,,,,,,,,,,


# Data analysis

In [145]:
fp7_organisations.ecContribution = pd.to_numeric(fp7_organisations.ecContribution, errors='coerce')

In [146]:
he_funds = he_organisations.groupby(['country'])\
                    .agg({'ecContribution': 'sum', 'rcn': 'count'})\
                    .sort_values(['ecContribution', 'rcn'], ascending=False)

h2020_funds = h2020_organisations.groupby(['country'])\
                    .agg({'ecContribution': 'sum', 'rcn': 'count'})\
                    .sort_values(['ecContribution', 'rcn'], ascending=False)

fp7_organisations.ecContribution = pd.to_numeric(fp7_organisations.ecContribution, errors='coerce')
fp7_funds = fp7_organisations.groupby(['country'])\
                    .agg({'ecContribution': 'sum', 'rcn': 'count'})\
                    .sort_values(['ecContribution', 'rcn'], ascending=False)

fp6_organisations.ecContribution = pd.to_numeric(fp6_organisations.ecContribution, errors='coerce')
fp6_funds = fp6_organisations.groupby(['country'])\
                    .agg({'ecContribution': 'sum', 'projectRcn': 'count'})\
                    .sort_values(['ecContribution', 'projectRcn'], ascending=False)

fp5_funds = fp5_organisations.groupby(['country'])\
                    .agg({'ecContribution': 'sum', 'projectRcn': 'count'})\
                    .sort_values(['ecContribution', 'projectRcn'], ascending=False)

fp4_funds = fp4_organisations.groupby(['country'])\
                    .agg({'ecContribution': 'sum', 'projectRcn': 'count'})\
                    .sort_values(['ecContribution', 'projectRcn'], ascending=False)

fp3_funds = fp3_organisations.groupby(['country'])\
                    .agg({'ecContribution': 'sum', 'projectRcn': 'count'})\
                    .sort_values(['ecContribution', 'projectRcn'], ascending=False)

fp2_funds = fp2_organisations.groupby(['country'])\
                    .agg({'ecContribution': 'sum', 'projectRcn': 'count'})\
                    .sort_values(['ecContribution', 'projectRcn'], ascending=False)

fp1_funds = fp1_organisations.groupby(['country'])\
                    .agg({'ecContribution': 'sum', 'projectRcn': 'count'})\
                    .sort_values(['ecContribution', 'projectRcn'], ascending=False)

In [147]:
data = he_funds.join(h2020_funds, rsuffix='_h2020', how='outer')
data = data.join(fp7_funds, rsuffix='_fp7', how='outer')
data = data.join(fp6_funds, rsuffix='_fp6', how='outer')
data = data.join(fp5_funds, rsuffix='_fp5', how='outer')
data = data.join(fp4_funds, rsuffix='_fp4', how='outer')
data = data.join(fp3_funds, rsuffix='_fp3', how='outer')
data = data.join(fp2_funds, rsuffix='_fp2', how='outer')
data = data.join(fp1_funds, rsuffix='_fp1', how='outer')
data

Unnamed: 0_level_0,ecContribution,rcn,ecContribution_h2020,rcn_h2020,ecContribution_fp7,rcn_fp7,ecContribution_fp6,projectRcn,ecContribution_fp5,projectRcn_fp5,ecContribution_fp4,projectRcn_fp4,ecContribution_fp3,projectRcn_fp3,ecContribution_fp2,projectRcn_fp2,ecContribution_fp1,projectRcn_fp1
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
AD,,,,,,,,,0.0,1.0,,,,,,,,
AE,0.00,1.0,500000.00,4.0,370219.89,7.0,0.0,1.0,,,,,,,,,,
AF,120250.00,1.0,99237.99,2.0,16718.75,1.0,0.0,2.0,,,,,,,,,,
AI,0.00,1.0,1253137.26,5.0,,,,,,,,,,,,,,
AL,2261386.50,20.0,5589330.11,52.0,2268549.18,41.0,89100.0,40.0,0.0,8.0,0.0,8.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YE,,,134073.75,1.0,142309.55,2.0,,,,,,,,,,,,
YU,,,,,,,161840.0,5.0,,,,,,,,,,
ZA,23734251.41,83.0,61109539.47,312.0,34390202.55,245.0,0.0,137.0,0.0,67.0,0.0,44.0,0.0,1.0,,,,
ZM,1298953.00,8.0,1976161.63,13.0,1637946.40,10.0,0.0,12.0,0.0,12.0,0.0,13.0,0.0,6.0,,,,


In [148]:
data = data.fillna(0)

In [149]:
data[data.index == 'IT']

Unnamed: 0_level_0,ecContribution,rcn,ecContribution_h2020,rcn_h2020,ecContribution_fp7,rcn_fp7,ecContribution_fp6,projectRcn,ecContribution_fp5,projectRcn_fp5,ecContribution_fp4,projectRcn_fp4,ecContribution_fp3,projectRcn_fp3,ecContribution_fp2,projectRcn_fp2,ecContribution_fp1,projectRcn_fp1
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
IT,1670405000.0,3994.0,5676402000.0,17151.0,3549517000.0,12437.0,93755605.0,6673.0,0.0,8025.0,0.0,6450.0,0.0,3324.0,0.0,1922.0,0.0,817.0


In [150]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 206 entries, AD to ZW
Data columns (total 18 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   ecContribution        206 non-null    float64
 1   rcn                   206 non-null    float64
 2   ecContribution_h2020  206 non-null    float64
 3   rcn_h2020             206 non-null    float64
 4   ecContribution_fp7    206 non-null    float64
 5   rcn_fp7               206 non-null    float64
 6   ecContribution_fp6    206 non-null    float64
 7   projectRcn            206 non-null    float64
 8   ecContribution_fp5    206 non-null    float64
 9   projectRcn_fp5        206 non-null    float64
 10  ecContribution_fp4    206 non-null    float64
 11  projectRcn_fp4        206 non-null    float64
 12  ecContribution_fp3    206 non-null    float64
 13  projectRcn_fp3        206 non-null    float64
 14  ecContribution_fp2    206 non-null    float64
 15  projectRcn_fp2        206 no

In [151]:
data['total_contribution'] = data['ecContribution']\
                            + data['ecContribution_h2020']\
                            + data['ecContribution_fp7']\
                            + data['ecContribution_fp6']\
                            + data['ecContribution_fp5']\
                            + data['ecContribution_fp4']\
                            + data['ecContribution_fp3']\
                            + data['ecContribution_fp2']\
                            + data['ecContribution_fp1']

data['total_projects'] = data['rcn']\
                            + data['rcn_h2020']\
                            + data['rcn_fp7']\
                            + data['projectRcn']\
                            + data['projectRcn_fp5']\
                            + data['projectRcn_fp4']\
                            + data['projectRcn_fp3']\
                            + data['projectRcn_fp2']\
                            + data['projectRcn_fp1']
data = data[['total_contribution', 'total_projects']]

In [152]:
min_max_scaler = preprocessing.MinMaxScaler((3, 100))

In [153]:
data = data.sort_values(['total_contribution', 'total_projects'], ascending=False).reset_index()
trace = go.Scatter(x=data.index,
                   y=data['total_contribution'],
                   mode='markers',
                   text=data['country'],
                   marker=dict(size=min_max_scaler.fit_transform(data[['total_projects']]))
                  )

layout = go.Layout(
    title='',
    xaxis=dict(type='log'),
    yaxis=dict(type='log')
)

fig = dict(data=[trace], layout=layout)
plotly.offline.iplot(fig)

In [154]:
data = data.sort_values(['total_projects', 'total_contribution'], ascending=False).reset_index()
trace = go.Scatter(x=data.index,
                   y=data['total_projects'],
                   mode='markers',
                   text=data['country'],
                   marker=dict(size=min_max_scaler.fit_transform(data[['total_contribution']]))
                  )

layout = go.Layout(
    title='',
    xaxis=dict(type='log'),
    yaxis=dict(type='log')
)

fig = dict(data=[trace], layout=layout)
plotly.offline.iplot(fig)