In [5]:
import csv
import re

from geojson import Point, Feature, FeatureCollection, dump, MultiPoint

## Process the records into GeoJson

In [6]:
records = []

with open('DATA MASTER-dh-copy-v8-2024-07-17.csv', encoding='utf-8-sig') as file:
    reader = csv.DictReader(file)
    for row in reader:
        records.append(row)
    file.close()    

In [7]:
list(records[0].keys())

['Theme (KEF Themes)',
 'Title',
 'Memory Mapper Title (keywords to identify the project) (should include the research content',
 'Project Location - In London',
 'Date',
 'Link',
 'Identification- DOI/URL/ISBN/Reference no.',
 'Authors',
 'London Authors',
 'Other Author (National+International)',
 'Department',
 'Abstract',
 'Partners',
 'Partnership Location - In London',
 'Partnership',
 'Policy Sources////Citations ( from Overton)',
 'Policy source subcategory',
 'Partnership 1',
 'Partnership 2',
 'Partnership 3',
 'Partnership 4',
 'Filter 2 (Themes)',
 'Funder 1',
 'Funder 2',
 'Funder 3',
 'Source',
 'Type of Work (phD/masters)',
 'Column',
 'Column2',
 'Column3',
 'Column4',
 'Column5',
 'Column6',
 'Column7']

In [18]:
features = []

for row in records:
    points = []
    clean = ''
    for string in row['Project Location - In London'].split('\n'):
        res = re.search(r'\((-\d+|\d+)\d+.\d+,(-\d+|\d+).\d+\)', string)
        if (res):
            clean = re.sub(r'\((-\d+|\d+)\d+.\d+,(-\d+|\d+).\d+\)', '', string)
            coord_string = re.sub(r'\(|\)', '', res[0])
            coords = tuple([float(i) for i in coord_string.split(',')])
            p = Point((coords[1], coords[0]))
            points.append(p)
    geom = MultiPoint(points)
    
    #partner_types = [row['Partnership 1'], row['Partnership 2'], row['Partnership 3'], row['Partnership 4']]
    #funders = [row['Funder 1'], row['Funder 2'], row['Funder 3']]
    
    
    f = Feature(geometry=geom, properties={
            'name': clean.strip(), 
            'full_title': row['Title'].split(':')[0],
            'short_title': row['Memory Mapper Title (keywords to identify the project) (should include the research content'],
            'kef_theme': row['Theme (KEF Themes)'],
            'subject_area': row['Filter 2 (Themes)'],
            'department': row['Department'],
            'abstract': row['Abstract'],
            'partners': row['Partners'],
            'primary_funder': row['Funder 1'],
            'primary_partnership_type': row['Partnership 1'],
            'data_source': row['Source'],
            'authors': row['Authors']
        },
    )
    features.append(f)

In [19]:
collection = FeatureCollection(features)

In [20]:
with open('./data-master-research-locations-v8-2024-07-17.geojson', 'w') as file:
    dump(collection, file)
    file.close()

## Create a graph...

In [21]:
import networkx as nx

In [28]:
g = nx.Graph()

for r in records:
    if r['Department'] == '':
        continue
    
    department = r['Department']
    
    if department not in g.nodes():
        g.add_node(department, node_type='department')
    
    p_types = [r['Partnership 1'], r['Partnership 2'], r['Partnership 3'], r['Partnership 4']]
    funders = [r['Funder 1'], r['Funder 2'], r['Funder 3']]
    
    #for p in p_types:
    #    if p == '':
    #        continue
    #    if p not in g.nodes():
    #        g.add_node(p, node_type='partnership')
    #    g.add_edge(department, p)
    
    for f in funders:
        if f == '':
            continue
        if f not in g.nodes():
            g.add_node(f, node_type='funder')
        g.add_edge(department, f)
        
    #if r['Filter 2 (Themes)'] != '':
    #    subject = r['Filter 2 (Themes)']
    #    if subject not in g.nodes():
    #        g.add_node(subject, node_type='research area')
    #    g.add_edge(department, subject)

In [29]:
nx.write_gexf(g, 'departments-funders-network-v1-2024-07-11.gexf')

In [None]:
# Again, but source, target, value csv

In [39]:
edges = []
points = []

for r in records:
    if r['Department'] == '':
        continue
    
    department = r['Department']
    
    point = {'ID': department, 'Group': 'A'}
    
    if point not in points:
        points.append(point)
    
    p_types = [r['Partnership 1'], r['Partnership 2'], r['Partnership 3'], r['Partnership 4']]
    
    for p in p_types:
        
        if p == '':
            continue

        source_target_pair = [x for x in edges if x['source'] == department and x['target'] == p]

        if len(source_target_pair):
            source_target_pair[0]['value'] += 1
        else:
            edge = {'source': department, 'target': p, 'value': 1}
            edges.append(edge)

        point = {'ID': p, 'Group': 'B'}

        if point not in points:
            points.append(point)
            
            

In [40]:
with open('department-to-partnership-types-edges-v1-2024-07-11.csv', 'w') as file:
    writer = csv.DictWriter(file, fieldnames=list(edges[0].keys()))
    writer.writeheader()
    for row in edges:
        writer.writerow(row)
    file.close()

In [41]:
with open('department-to-partnership-types-points-v1-2024-07-11.csv', 'w') as file:
    writer = csv.DictWriter(file, fieldnames=list(points[0].keys()))
    writer.writeheader()
    for row in points:
        writer.writerow(row)
    file.close()