In [1]:
import warnings

warnings.filterwarnings('ignore')

import pandas as pd
from pathlib import Path

from fuzzywuzzy import fuzz
from fuzzywuzzy import process

In [2]:
datadir = Path("~/Data/scholarships").expanduser()

summary_scholarships = datadir / 'scraped_data.csv'
detail_scholarships = datadir / 'fullScholarshipInfo.csv'

outfile = datadir / 'mergedScholarshipInfo.csv'

In [3]:
print(summary_scholarships.as_posix())
print(detail_scholarships.as_posix())

/home/damian/Data/scholarships/scraped_data.csv
/home/damian/Data/scholarships/fullScholarshipInfo.csv


In [4]:
df_summaryScholarships = pd.read_csv(summary_scholarships.as_posix())
df_detailScholarships = pd.read_csv(detail_scholarships.as_posix())

In [5]:
df_summaryScholarships.head()

Unnamed: 0,scholarshipID,scholarshipDetailURL,Award Name,Organization,Purposes,Level Of Study,Award Type,Award Amount,Deadline
0,9993981,https://www.careeronestop.org/toolkit/training...,NAWIC Construction Trades Scholarship,National Association of Women in Construction,To support women pursuing construction-related...,Bachelor's Degree Vocational,Scholarship,,February
1,9990581,https://www.careeronestop.org/toolkit/training...,PC Construction Scholarship,Vermont Student Assistance Corporation,To support a Vermont graduating senior pursuin...,Bachelor's Degree,Scholarship,"$2,500",February
2,9993165,https://www.careeronestop.org/toolkit/training...,Pence Construction Diversity in Leadership Sch...,Oregon Community Foundation - Portland,To support underrepresented students of color ...,Bachelor's Degree Vocational,Scholarship,,March
3,9997696,https://www.careeronestop.org/toolkit/training...,Carpenters' Company Scholarship Program,Carpenters' Company of the City and County of ...,"To provide architecture, structural engineerin...",Bachelor's Degree,Scholarship,"$5,000",
4,9992419,https://www.careeronestop.org/toolkit/training...,JMA Architecture Studios Scholarship,Public Education Foundation,"To provide financial assistance for tuition, f...",Bachelor's Degree,Scholarship,"$5,000",January


In [6]:
df_detailScholarships.head()

Unnamed: 0,scholarshipId,ScholarshipName,Organization,Address,Toll Free Number,Phone Number,Emails,Level of Study,Award Type,Purpose,...,Qualifications,Criteria,Duration,To Apply,Deadline,Contact,For more information,Funds,responseCode,Number of Awards
0,9993981,NAWIC Construction Trades Scholarship,National Association of Women in Construction,"327 S Adams St., Fort Worth, TX 76104",800-552-3506,(817)877-5551,nawic@nawic.org,Bachelor's Degree Vocational,Scholarship,To support women pursuing construction-related...,...,Applicants must be currently enrolled in a con...,Application will be reviewed and selected by t...,Annual.,Applicants must submit the following: complete...,February 28.,Email: nfsf@nawic.org; URL: www.nawic.org/nfsf...,http://www.nawic.org,,,
1,9990581,PC Construction Scholarship,Vermont Student Assistance Corporation,"10 E Allen St., Winooski, VT 05404",,(800)642-3177,info@vsac.org,Bachelor's Degree,Scholarship,To support a Vermont graduating senior pursuin...,...,Applicant must be a Vermont resident; current ...,"Selection is based on financial need, essay, a...",Annual.,Applicant must submit a completed application ...,February 11.,Email: scholarships@vsac.org; URL: www.vsac.or...,http://www.vsac.org,"$2,500",,1.0
2,9993165,Pence Construction Diversity in Leadership Sch...,Oregon Community Foundation - Portland,"1221 SW Yamhill St., Ste. 100, Portland, OR",,(503)227-6846,info@oregoncf.org,Bachelor's Degree Vocational,Scholarship,To support underrepresented students of color ...,...,Applicants must be underrepresented students o...,Preference is given to Multnomah County reside...,Annual.,Application is available online.,March 1.,Email: scholarships@oregoncf.org; URL: oregonc...,http://oregoncf.org/,,,
3,9997696,Carpenters' Company Scholarship Program,Carpenters' Company of the City and County of ...,"320 Chestnut St., Carpenters Hall, Philadelphi...",,(215)925-0167,carphall@carpentershall.com,Bachelor's Degree,Scholarship,"To provide architecture, structural engineerin...",...,,,Annual; up to 4 years.,Applicants must submit a completed scholarship...,,,http://www.carpentershall.org,"$5,000",,
4,9992419,JMA Architecture Studios Scholarship,Public Education Foundation,"4350 S Maryland Pky., Las Vegas, NV 89119",,(702)799-1042,info@thepef.org,Bachelor's Degree,Scholarship,"To provide financial assistance for tuition, f...",...,Applicants must be CCSD seniors interested in ...,Preference will be given to students who have ...,Annual.,Applicants should submit a completed applicati...,January 31.,Phone: 702-221-7422; Email: csdonnelly@ccpef.org.,http://thepef.org,"$5,000",,3.0


In [7]:
df_detailScholarships.shape

(798, 21)

In [8]:
df_summaryScholarships.shape

(798, 9)

In [9]:
df_allScholarships = df_summaryScholarships.merge(df_detailScholarships, 
                                                  left_on='scholarshipID', 
                                                  right_on='scholarshipId', 
                                                  suffixes=('_left', '_right')
                                                 )

In [10]:
df_allScholarships.shape

(798, 30)

In [11]:
df_allScholarships.head()

Unnamed: 0,scholarshipID,scholarshipDetailURL,Award Name,Organization_left,Purposes,Level Of Study,Award Type_left,Award Amount,Deadline_left,scholarshipId,...,Qualifications,Criteria,Duration,To Apply,Deadline_right,Contact,For more information,Funds,responseCode,Number of Awards
0,9993981,https://www.careeronestop.org/toolkit/training...,NAWIC Construction Trades Scholarship,National Association of Women in Construction,To support women pursuing construction-related...,Bachelor's Degree Vocational,Scholarship,,February,9993981,...,Applicants must be currently enrolled in a con...,Application will be reviewed and selected by t...,Annual.,Applicants must submit the following: complete...,February 28.,Email: nfsf@nawic.org; URL: www.nawic.org/nfsf...,http://www.nawic.org,,,
1,9990581,https://www.careeronestop.org/toolkit/training...,PC Construction Scholarship,Vermont Student Assistance Corporation,To support a Vermont graduating senior pursuin...,Bachelor's Degree,Scholarship,"$2,500",February,9990581,...,Applicant must be a Vermont resident; current ...,"Selection is based on financial need, essay, a...",Annual.,Applicant must submit a completed application ...,February 11.,Email: scholarships@vsac.org; URL: www.vsac.or...,http://www.vsac.org,"$2,500",,1.0
2,9993165,https://www.careeronestop.org/toolkit/training...,Pence Construction Diversity in Leadership Sch...,Oregon Community Foundation - Portland,To support underrepresented students of color ...,Bachelor's Degree Vocational,Scholarship,,March,9993165,...,Applicants must be underrepresented students o...,Preference is given to Multnomah County reside...,Annual.,Application is available online.,March 1.,Email: scholarships@oregoncf.org; URL: oregonc...,http://oregoncf.org/,,,
3,9997696,https://www.careeronestop.org/toolkit/training...,Carpenters' Company Scholarship Program,Carpenters' Company of the City and County of ...,"To provide architecture, structural engineerin...",Bachelor's Degree,Scholarship,"$5,000",,9997696,...,,,Annual; up to 4 years.,Applicants must submit a completed scholarship...,,,http://www.carpentershall.org,"$5,000",,
4,9992419,https://www.careeronestop.org/toolkit/training...,JMA Architecture Studios Scholarship,Public Education Foundation,"To provide financial assistance for tuition, f...",Bachelor's Degree,Scholarship,"$5,000",January,9992419,...,Applicants must be CCSD seniors interested in ...,Preference will be given to students who have ...,Annual.,Applicants should submit a completed applicati...,January 31.,Phone: 702-221-7422; Email: csdonnelly@ccpef.org.,http://thepef.org,"$5,000",,3.0


In [12]:
df_allScholarships.to_csv(outfile.as_posix())