In [None]:
#Required installations for running these functions
from google.colab import drive
drive.mount('/content/drive')
!pip install geopandas
!apt install libspatialindex-dev
!pip install pygeos
!pip install rtree


In [None]:
#Imports of installed programs and their call id in the functions
import geopandas as gpd
import glob as glob
import pygeos as py
import rtree as rtree
import matplotlib
import matplotlib.pyplot as plt

In [None]:
#CSV reader
def csvReader(path):
  geoms_filenames = glob.glob('path')
  geoms_filenames = geoms_filenames[0]
  gdf_output = geoms_filenames
  return(gdf_output)

#import censusFinal, census2, and census3 for testing functions below

In [None]:
#Function 1: 
#import geodata frame from a json or geojson file
#choose how to display the geodata frame once it has been imported
#path is where the information will be coming from, for example on google colab copy the path from the file in your sidemenu
#example: '/content/drive/MyDrive/sidewalks.geojson'
#dataview=0, select either plot, histogram, or gdf in place of dataview=0 with single quotations around the string
#plot displays the geodata frame as a map, showing the geometry of the dataframe
#gdf displays the geodata frame's first 5 rows
#This selection is meant as a check to make sure that the geodata frame is imported properly

def json_to_gdf (path, dataview=0):
  geoms_filenames = glob.glob(path)
  gdf_convert = gpd.read_file(geoms_filenames[0])
  gdf_output = gdf_convert
  if dataview=='plot':
    gdf_output.plot()
  if dataview=='gdf':
      gdf_output.head()
  return(gdf_output)

In [None]:
#Function 2: 
#Function for seeing all or chosen information about the dataframe/dataset that is being imported

#Select a number to choose which type of information is returned
#1 returns all information, column names, projection information, calculations based on column values
#2 returns the mean, maximum, minimum, mean absolute deviation, and standard deviation
#3 returns the number of columns, the column labels, each column's datatype, the amount of memory the dataframe takes, range index, and the number of cells 
#that have non-null values for each column
#4 returns a select portion of the dataframe, the range must be selected for this function, range should look like example: [dataframe['column'] == value] 
#put 0 if there is no range required


def informationDisplay (dataframe, infoWanted, range):
  if infoWanted == 0:
    exit()
  if infoWanted == 1:
    nameColumn = dataframe.columns
    projection =  dataframe.crs
    description = dataframe.describe()
    print (nameColumn, projection, description)
  if infoWanted == 2: 
    print(
    "mean: ", dataframe[range].mean()
    ,"max: ", dataframe[range].max()
    ,"min: ", dataframe[range].min()
    ,"std:", dataframe[range].std()
    ,"mean absolute:", dataframe[range].mad()
    )
  if infoWanted == 3: 
    dataframe.info()
  if infoWanted == 4:
    rangeWanted = dataframe.loc[range]
    print(rangeWanted)
    

In [None]:
#Function 3: Plot geodata frames from a list on one figure
#First a list must be created
#To create a list declare a variable as follows: new_list =[a,b,c,...]
#Pass the list as the argument to the function
#The function will iterate through the list and return a mapped plot of all the layers in the list
#Choose a path to save the image for the variable path
#Works best with vector data for large scale maps

#data from data folder after being imported in
ListofLayers = [tactilepavingWestmount,sidewalksWestmount,drinkingFount]

def layeredMap (dataList,path):
  fig, ax = matplotlib.pyplot.subplots(figsize=(14,10))
  x, y, arrow_length = 0.1, 0.9, 0.1
  ax.annotate('N', xy=(x, y), xytext=(x, y-arrow_length),
            arrowprops=dict(facecolor='black', width=2, headwidth=20),
            ha='center', va='center', fontsize=20,
            xycoords=ax.transAxes)
  for dataFrames in dataList:
    dataFrames.plot(ax=ax)
  plt.savefig(path, dpi=300)
  return()
    

In [None]:
#Function 4: Clean Up Dataframes
#Choice of different techniques to clean up the data
#rename will rename the column titles with the input that you give through a dictionary, variable is columnsDict
#convert will convert a column to a different datatype, i.e. convert from float to integer
#user is prompted with a choice to convert a column after inspecting the column
#fillna will fill in a value given to it by x for the chosen column, column and values have to be given by a list
#list should have column names as: 'Column' in the list
#drop will drop a row based on the index number given to it by x

def DataCleanUp (dataframe,columnsDict,dataList,x,cleanupType='type'):
  #replace the column names
  if cleanupType == 'rename':
    dataframe.rename(columnsDict,inplace=True)
    return(dataframe)
  if cleanupType == 'convert':
    dataframe.dtypes
    accept = input("Do you want to convert a column? type 'yes' to continue or 'no' to end function")
    if accept == 'yes':
      columnconvert = input("Please input the column and type like: 'Column':'type'")
      to_convert = {columnconvert}
      dataframe=dataframe.astype(to_convert)
      return(dataframe)
    if accept == 'no':
      exit()
  if cleanupType == 'fillna':
    for chosencolumn in dataList:
      to_fill = {dataList[chosencolumn]:x}
    cleaneddataframe = dataframe.fillna(to_fill)
    return(cleaneddataframe)
  if cleanupType == 'drop':
    droppedData = dataframe.drop(x)
    return(droppedData)
    


In [None]:
#Function 5: Analyze Data
#In this function we will take census data with numerical values 
#Data can be analyzed in different manners
#change will calculate the change from one year to another based on column values
#aggregate will aggregate the data based on the column, Column, and aggregate function, method
#methods to choose are sum, first, last, min, max, mean, median
#aggregate2 will aggregate the data based on a column, i.e. it can aggregate small dissemenation areas into larger census areas

def AnalyzeData(dataframe, Column, Column2, method, ColumnList, analysis='change'):
  #Calculate change
  if analyis == 'change':
    dataframenew = Column-Column2
    percentageChange = Column/Column2
    return(dataframenew,percentageChange)
  if analysis == 'aggregate':
    #aggregate data based on method
   dataframedissolve = dataframe.dissolve(by=Column,aggfunc=method)
   return(dataframedissolve)
  if analysis == 'aggregate2':
    #Aggregate data by column
    boundary = datatframe[[Column,Column2]]
    regions = boundary.dissovle(by=Column)
    return(regions)


In [None]:
#Function 6: Calculations
#percent will calculate the percentage of the dividend between two columns given to it by the arguments Column1 and Column2
#average will calculate the average of the column and create a new column with the average on each row
#subtract will find the difference between the two column values based on their index
#each if will return the dataframe with the new column 

def Calculations (dataframe, Column1, Column2, math='type'):
  if math == 'percent':
    dataframeout["percentage"] = (dataframe[Column1]/dataframe[Column2])*100
    return(dataframeout)
  if math == 'average':
    columnTotal = dataframe[Column1].sum()
    columnLength = len(dataframe.columns)
    dataframeout["average"] = columnTotal/columnLength
    return(dataframeout)
  if math == 'subtract':
    dataframeout["difference"] = dataframe[Column1]-dataframe[Column2]
    return(dataframeout)

In [None]:
#Function 7: Reclassification of dataframes
#A two part function
#First reclassify the rows based on a desired value
#enter the name of the dataframe for the variable dataframe
#enter the column to be analyzed for the variable column
#enter the desired value for the variable desiredValue, this can appear as ex. 'none', or 1 
#entername for the column you wish to create in order to not forgot what the desirability value is for 
#Reclassification function will give either a 1 or 0 value based on the value found in the row of the column
#it will return a dataframe with a new column
#function will have to be rerun for all columns desired to be reclassified

#For the second function sumReclassification this will sum the total columns that have a desirability value into a new column
#this new column can be used to map the dataframe based on desirability
#rows that have a higher number will have a higher desirability 
#A list of columns to be summed must be given to this function as the variable ColumnList
#This function also requires a dataframe input
#It will return the dataframe with the new column of desirabilitysum 

def Reclassification(dataframe, column, desiredValue,desirabilityType):
  if value in dataframe[column] == desiredValue:
    dataframe[desirabilityType] = 1
  else:
    dataframe[desirabilityType] = 0
  return(dataframe)

def sumReclassification(dataframe,ColumnList):
  dataframe['desirabilitysum'] = dataframe[ColumnList].sum(axis=1)
  return(dataframe)

In [None]:
#Function 8: Check and Change CRS
#List should have the list of geodata frames to check and make sure that they are all the same 
#Will be prompted to select if want to reproject or not, input shoud be either 'yes' or 'no'
#If 'yes' is selected, then will be prompted to select the projection frame that is desired
#Input of this should be 'EPSG:2950' as an example where 2950 is the projection value desired
#dataframes that need to be checked or reprojected must be given in a list through the variable List

ListofLayers = [tactilepavingWestmount,sidewalksWestmount,drinkingFount]

def projectionValidation (List):
  for dataFrames in List:
    projection = dataFrames.crs
    print(projection)
  accept = input("Do you want to reproject the dataframes?")
  if accept == 'yes':
    projectionWanted = input("Input desired projection")
    for dataFrames in List:
      projected_gdf = dataFrames.to_crs(projectionWanted)
      return(projected_gdf)
  if accept == 'no':
    exit()

In [None]:
#Function 9: Create a Scaled Map
#Input the final dataframe that you wish to map with the column to be mapped
#Arguments are dataframe which is the dataframe you wish to you, Column name as 'Column', max is the maximum limit 
#of scale bar, min is the minimum limit of the scale bar, max and min need to be integer values, and path is the 
#location for the function to save the image of the map that you will create
#Included in the map are the axes limits on the figure, and a north arrow that is in a fixed location in the top left corner


def ScaledMap (dataframe, Column, max, min, path):
  ax = dataframe.plot(
    'Column'
    ,figsize=(14,10)
    ,edgecolor="#000"
    ,linewidth=0.3
    ,vmax=max
    ,vmin=min
    ,legend=True
    ,cmap="Blues"
    )
  ax.annotate('N', xy=(x, y), xytext=(x, y-arrow_length),
            arrowprops=dict(facecolor='black', width=2, headwidth=20),
            ha='center', va='center', fontsize=20,
            xycoords=ax.transAxes)
  plt.savefig(path, dpi=300)

In [None]:
#Function 10: Create a Graph of the data 
#Choice of scatter plot, histogram, bar chart or pie chart
#to select a type of graph choose the type from 'scatter' for scatter plot, 
#'hist' for histogram, 'bar' for bar chart, or '0' for pie chart
#select the x and y values to be graphed, note hist and pie do not have a y value and none should be inputed
#to add a title the title argument should appear as 'TITLE OF GRAPH' when calling the function 

def graphFunction(x,title,type='scatter',**y):
  if type=='scatter':
    plt.scatter(x,y)
    plt.title("title")
    scatterplot = plt.show()
    return(scatterplot)
  elif type=='hist':
    plt.hist(x)
    plt.title("title")
    histplot = plt.show()
    return(histplot)
  elif type=='bar':
    plt.bar(x,y)
    plt.title("title")
    barplot = plt.show()
    return(barplot)
  else:
    plt.pie(x)
    plt.title("title")
    pieplot = plt.show()
    return(pieplot)