# Department of Defense SBIR Awards

In [47]:
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
import urllib.parse

from mdutils.mdutils import MdUtils
from tqdm.notebook import tqdm

# Load Data

In [51]:
# Load data
# with open('sbir_ai.json', 'r') as file:
#     data = json.load(file)

# Convert to dataframe
df = pd.read_csv('sbir_dod.csv')

# Fill blanks
df['ML'] = df.ML.fillna(0)
df['DA'] = df.DA.fillna(0)
df['AI'] = df.AI.fillna(0)

# Convert formats
df['Award Amount'] = pd.to_numeric(df['Award Amount'].str.replace('\,', '', regex=True).str.replace('\$', '', regex=True))
df['Branch'] = df.Branch.fillna('')
df['Research Keywords'] = df['Research Keywords'].fillna('')
df['Abstract'] = df.Abstract.fillna('')
df['Award Title'] = df['Award Title'].str.replace('\?', ' ', regex=True)

# AF, DARPA, MDA

In [44]:
df.columns

Index(['Sort ID', 'ID', 'Assignment', 'Company', 'Award Title', 'Agency',
       'Branch', 'Phase', 'Program', 'Agency Tracking Number', 'Contract',
       'Proposal Award Date', 'Contract End Date', 'Solicitation Number',
       'Solicitation Year', 'Topic Code', 'Award Year', 'Award Amount', 'DUNS',
       'Hubzone Owned', 'Socially and Economically Disadvantaged',
       'Woman Owned', 'Number Employees', 'Company Website', 'Address1',
       'Address2', 'City', 'State', 'Zip', 'Contact Name', 'Contact Title',
       'Contact Phone', 'Contact Email', 'PI Name', 'PI Title', 'PI Phone',
       'PI Email', 'RI Name', 'RI POC Name', 'RI POC Phone',
       'Research Keywords', 'Abstract', 'ML', 'DA', 'AI', 'Irrelevant'],
      dtype='object')

In [52]:
df_md = df.copy()

# Change Assignment
df_md.loc[df_md.Assignment == 'Christian', 'Assignment'] = 'CC'
df_md.loc[df_md.Assignment == 'Joel', 'Assignment'] = 'JH'

# Create filename
df_md['filename'] = '[' + df_md['ID'].apply(lambda x: '{:03d}'.format(x)).astype(str) + '] ' + df_md['Award Title']
df_md['filename'] = df_md['filename'].str.replace('/', ' or ')
df_md['filename'] = df_md['filename'].str.replace(':', ' -')
df_md['filename'] = df_md['filename'].str.replace('\.$', '', regex=True)
df_md['filename_exceed'] = df_md['filename'].str.len() > 150
df_md['filename'] = df_md['filename'].str[:150]
df_md.loc[df_md.filename_exceed, 'filename'] = df_md.loc[df_md.filename_exceed, 'filename'] + '...'
df_md['url'] = (df_md.filename.str[0:] + '.md').apply(lambda x: urllib.parse.quote(x))
df_md['url'] = 'https://github.com/chrischow/dod_sbir_awards/blob/master/Reports/' + df_md.Assignment + '/' + df_md.url

# Clean keywords
df_md['Research Keywords'] = df_md['Research Keywords'].str.replace(', ', ',').str.replace(',', ', ').str.lower()

In [54]:
for i in tqdm(range(df_md.shape[0])):
    
    # Extract information
    temp_df = df_md.iloc[i]
    filename = temp_df.filename
    title = temp_df['Award Title']
    abstract = temp_df.Abstract
    branch = temp_df.Branch
    award_year = temp_df['Award Year']
    award_amount = temp_df['Award Amount']
    keywords = temp_df['Research Keywords']
    temp_id = temp_df['ID']
    assignment = temp_df.Assignment
    
    # Create files
    mdFile = MdUtils(file_name=f'Reports/{assignment}/{filename}', title=title)
    
    # Abstract
    mdFile.new_header(level=1, title='Abstract')
    mdFile.new_paragraph(abstract)
    mdFile.new_line()
    
    # Award Details
    mdFile.new_header(level=1, title='Award Details')
    temp_tbl = [
        'Branch', 'Award Year', 'Award Amount', 'Keywords', 
        branch, str(award_year), '${:,.0f}'.format(award_amount), keywords
    ]
    mdFile.new_table(columns=4, rows=2, text=temp_tbl, text_align='center')
    mdFile.new_line()
    
    # Link back to home
    mdFile.new_line()
    mdFile.new_paragraph(f'[Back to Home](https://github.com/chrischow/dod_sbir_awards#{temp_id})')
    
    # Create file
    mdFile.create_md_file()

  0%|          | 0/2599 [00:00<?, ?it/s]

## Create README

In [72]:
# Create registry
registry = pd.DataFrame({
    'id': df_md.ID,
    'Award Title': df_md['Award Title'],
    'Award Year': df_md['Award Year'],
    'Research Keywords': df_md['Research Keywords'],
    'URL': df_md.url,
    'ic': df_md['Assignment']
})

In [80]:
# Create files
mdFile = MdUtils(file_name=f'README', title='Department of Defense SBIR Awards')

# Abstract
mdFile.new_header(level=1, title='About')
mdFile.new_paragraph('This repository contains code to generate reports on awards given to companies for DoD projects through the Small Business Innovation Research platform.')
mdFile.new_paragraph('The data was downloaded from [SBIR](https://www.sbir.gov/sbirsearch/award/all), processed in Python, and exported as reports in Markdown format for easy reading.')
mdFile.new_line()

# Awards
mdFile.new_header(level=1, title='List of Awards')
temp_tbl = [
    'ID', 'Award Title', 'Award Year', 'Keywords',  'IC', 'URL'
]

for i in range(registry.shape[0]):
    temp_data = registry.iloc[i]
    temp_tbl.extend([
        f"<span id='{temp_data['id']}'>{temp_data['id']}</span>", temp_data['Award Title'], temp_data['Award Year'].astype(str), temp_data['Research Keywords'], temp_data['ic'],
        f'[Link]({temp_data.URL})'
    ])

mdFile.new_table(columns=6, rows=registry.shape[0]+1, text=temp_tbl, text_align='left')
mdFile.new_line()

# Create file
mdFile.create_md_file()

<mdutils.fileutils.fileutils.MarkDownFile at 0x20ede728668>