In [None]:
import json
import pandas as pd
import re


In [None]:
# Import mapstreet data 

mapstreet = pd.read_csv("src_data\GISMAPSTREET.txt", sep="|", header=None, skip_blank_lines=True,
names=["MINORGRIDNAME", "STREETNAME", "ROADDIRECTIONPREFIXCD", "ROADNAME", "ROADTYPESUFFIXCD", "ROADDIRECTIONSUFFIXCD", "ZIPCD", "RIGHTZIPCD", "LOWADDRESSRANGEVALUE", "HIGHADDRESSRANGEVALUE", "TOWNSHIPNAME", "MUNICIPALITYNAME", "NUMCENTERLINES", "INTERSECTINGLENGTH", "GEOM_WKT"])

print(mapstreet.columns)

# mapstreet.dropna(inplace=True)
# mapstreet.drop_duplicates(keep='first', inplace=True)
mapstreet.drop(columns=["ROADDIRECTIONPREFIXCD", "ROADNAME", "ROADTYPESUFFIXCD", "ROADDIRECTIONSUFFIXCD", "RIGHTZIPCD", "LOWADDRESSRANGEVALUE", "HIGHADDRESSRANGEVALUE", "TOWNSHIPNAME"], inplace=True)

# dropped_cols = mapstreet.drop(columns=["ROADDIRECTIONPREFIXCD", "ROADNAME", "ROADTYPESUFFIXCD", "ROADDIRECTIONSUFFIXCD", "RIGHTZIPCD", "LOWADDRESSRANGEVALUE", "HIGHADDRESSRANGEVALUE", "TOWNSHIPNAME"])

# mapstreet = mapstreet.drop(columns=["ROADDIRECTIONPREFIXCD", "ROADNAME", "ROADTYPESUFFIXCD", "ROADDIRECTIONSUFFIXCD", "RIGHTZIPCD", "LOWADDRESSRANGEVALUE", "HIGHADDRESSRANGEVALUE", "TOWNSHIPNAME"])
# mapstreet.fillna("", inplace=True)

# print(dropped_cols)
print(mapstreet)
mapstreet.dtypes


In [None]:
# We can only map lines with WKT coordinates. Drop all rows which have no value in GEO_WKT
mapstreet.dropna(subset=['GEOM_WKT'], how='all', inplace=True)
print(mapstreet)
mapstreet.to_csv(r'./test_data/mapstreet-test.csv', header=True, index=None)

In [None]:
consequence = pd.read_csv("src_data\Street Consequence - Sheet1.csv", sep=",", header=None, skip_blank_lines=True,
names=["STATE", "MUNICIPALITY", "STREETNAME", "MINORGRIDNAME", "CONSEQUENCE"])

consequence.dropna(inplace=True)
print(consequence)
# consequence.dtypes

In [None]:
merged_df = pd.merge(consequence, mapstreet, on="STREETNAME")

# merged_df.dropna(axis=1, inplace=True)
# merged_df.dropna(axis=0, inplace=True)
# merged_df.drop(columns=["MINORGRIDNAME_y", "ROADDIRECTIONPREFIXCD", "ROADNAME", "ROADTYPESUFFIXCD", "ROADDIRECTIONSUFFIXCD", "LEFTZIPCD", "RIGHTZIPCD", "LOWADDRESSRANGEVALUE", "HIGHADDRESSRANGEVALUE", "TOWNSHIPNAME", "MUNICIPALITYNAME"], inplace=True)
merged_df.rename(columns={"MINORGRIDNAME_x" : "MINORGRIDNAME"}, inplace=True)

print(merged_df)
merged_df.dtypes

In [None]:
merged_df.to_csv(r'./test_data/combined_test.csv', header=True, index=None)

In [None]:
merged_small = merged_df.drop(columns=["STATE", "MUNICIPALITY", "MINORGRIDNAME_y", "ZIPCD", "MUNICIPALITYNAME", "NUMCENTERLINES", "INTERSECTINGLENGTH"])
merged_small.drop(index=0, inplace=True)
merged_small.dropna(subset=['GEOM_WKT', "CONSEQUENCE"], how='all', inplace=True)
merged_small.drop_duplicates(inplace=True)

print(merged_small)
merged_small.to_csv(r'./test_data/combined_test.csv', header=True, index=None)

In [None]:
select_rows = merged_small.tail(n=18)
print(max(merged_small['CONSEQUENCE']))
print(min(merged_small['CONSEQUENCE']))

merged_small.dtypes

In [None]:
features_list = []

for index, row in merged_small.iterrows():
    
    # Initialize list of coordinate pairs for the row
    if type(row['GEOM_WKT']) == str:

        if row['GEOM_WKT'].startswith('LINESTRING'):
            # Get coordinates for a line string
            coordinates_str = re.findall('\(([^)]+)', row['GEOM_WKT'])[0]
            coordinates_list = coordinates_str.split(",")
            
            linestring_list_of_coord_pairs = []
            for coordinate_pair in coordinates_list:
                float_pair = [float(val) for val in coordinate_pair.split()]
                linestring_list_of_coord_pairs.append(float_pair)

            if linestring_list_of_coord_pairs:
                # create a json block for the linestring feature with coordinates and properties
                feature =  { 
                    "type": "Feature",
                    "geometry": {
                        "type": "LineString",
                        "coordinates": linestring_list_of_coord_pairs 
                        },
                    "properties": {
                        "STREETNAME": row['STREETNAME'],
                        "MINORGRIDNAME": row['MINORGRIDNAME'],
                        "CONSEQUENCE": float(row['CONSEQUENCE'])
                        }
                    }
                features_list.append(feature)
                        

        elif row['GEOM_WKT'].startswith('MULTILINESTRING'):
            # Get coordinates for a multi line string
            # remove leading parenthesis to make this regex happy
            mulitline_coordinates = row['GEOM_WKT'].replace("MULTILINESTRING (", "")
            coordinates_lines = re.findall('\(([^)]+)', mulitline_coordinates)
            multiline_string_list = []
            
            for line in coordinates_lines:
                line_split = line.split(",")

                # temporary list to hold list of coordinates for a single line, will be contained in final list of lists
                coord_line_temp = []
                for coordinate_pair in line_split:
                    float_pair = [float(val) for val in coordinate_pair.split()]
                    coord_line_temp.append(float_pair)
                multiline_string_list.append(coord_line_temp)

                
            if multiline_string_list:
                # confirm that there is a list of coordinate lines

                # create a json block for the multilinestring feature with coordinates and properties
                feature =  { 
                    "type": "Feature",
                    "geometry": {
                        "type": "MultiLineString",
                        "coordinates": multiline_string_list 
                        },
                    "properties": {
                        "STREETNAME": row['STREETNAME'],
                        "MINORGRIDNAME": row['MINORGRIDNAME'],
                        "CONSEQUENCE": float(row['CONSEQUENCE'])
                        }
                    }
                features_list.append(feature)


In [None]:
# print(features_list)

In [None]:
geometries = {
    'type': 'FeatureCollection',
    'features': features_list,
    }


with open("test_data/allen-co-pipes.json", "w") as write_file:
    json.dump(geometries, write_file, indent=4)


In [None]:
test_str = "inside one (inside two), (another two) )"

coordinates_lines = re.findall('\(([^)]+)', test_str)
# coordinates_str = coordinates_str.replace(",", "")
print(coordinates_lines)

In [None]:
    # if coord_list_of_list:
    #     # print('we have coordinates')

    #     for long, lat in coord_list_of_list:
    #         # print(long, ', ', lat)

    #         feature =  { 
    #             "type": "Feature",
    #             "geometry": {
    #                 "type": "LineString",
    #                 "coordinates": [
    #                 [102.0, 0.0], [103.0, 1.0], [104.0, 0.0], [105.0, 1.0]
    #                 ]
    #                 },
    #             "properties": {
    #                 "prop0": "value0",
    #                 "prop1": 0.0
    #                 }
    #             }

            # for longs,lats in LongLatList
            #     poly = {
            #         'type': 'Polygon',
            #         'coordinates': [[lon,lat] for lon,lat in zip(longs,lats) ]
            #     }
            #     geos.append(poly)

            # geometries = {
            #     'type': 'FeatureCollection',
            #     'features': geos,
            # }

            # geo_str = json.dumps(geometries)