In [206]:
import pandas as pd
import numpy as np
import json

from geopy.distance import geodesic

In [269]:
def census_diff(df, census):

    grp = df.groupby(["plot",  census])
    cen = grp.date.agg(['min', 'max'])
    cen["mid"] = (cen["min"] + (cen["max"] - cen["min"])/2).dt.date
    cen["difference"] = cen["mid"].diff().astype('timedelta64[D]')
    cen.loc[cen["difference"] < 0 , "difference"] = np.NAN
    cen["diff_yrs"] = cen.difference/365

    cen.reset_index(level=0, inplace=True)
    cen.reset_index(level=0, inplace=True)

    cen[census].astype(str)
    cen["step"] = cen[census].astype(str).shift() + "-" + cen[census].astype(str)
    cen.loc[cen["difference"].isnull(), "step"] = np.NaN
    cen.index = cen['plot'] + "_" + cen['step']

    return cen

In [207]:
# open plot locations
with open('../Data/rows.geojson') as f:
    data = json.load(f)

properties = pd.DataFrame()

# add each line of geojson file to dataframe
for i in range(0, len(data['features'])):
    properties = properties.append(pd.DataFrame(data['features'][i]['properties'], index = [i]))

# only really care about these columns
properties = properties.loc[: , ["plot_size",
                                 "centroid_y",
                                 "centroid_x",
                                 "fractal_order",
                                 "location"]]

properties.rename(columns={'centroid_y':'longitude', 'centroid_x':'latitude'}, inplace=True)
properties["longlat"] = properties.apply(lambda x: [x.longitude, x.latitude], axis=1)

# seperate dataframe for only second order fractal points
second_order = properties.loc[properties.fractal_order == 2, : ]

# function to find point closest to given point
def closest(pt, others):
    
    clst_pt = min(others.longlat, key = lambda x: geodesic(pt, x).meters)
        
    return others.location.loc[others.longlat.apply(lambda x: x == clst_pt)].reset_index(drop = True)


# fractal nesting and agb data
fpn = pd.read_csv("../Data/Fractal_point_nesting.csv")
agb = pd.read_csv("../Data/AGB.csv")

# specific wanted columns - and rename ***(going with Chave moist)***
agb = agb[["field_name", "Plot", "Date", "AGB_Chave_moist", "ForestQuality"]]
agb.columns = ["field_name", "plot", "date", "agb", "forestquality"]

In [208]:
E  = pd.read_csv("../Data/small_mammals/test/E_test.csv")
F  = pd.read_csv("../Data/small_mammals/test/F_test.csv")
D  = pd.read_csv("../Data/small_mammals/test/D_test.csv")
OG = pd.read_csv("../Data/small_mammals/test/OG_test.csv")

In [209]:
def sort_mams(df):

    ncolnames = ["occasion", "date", "grid", "point", "trap", "trap_id", "species"]

    df.columns  = ncolnames

    # stupid formatting sorted
    df["occasion"]  = df.occasion.str.replace("--", "-")
    df["grid"]      = df.grid.str.replace("--", "-")
    df["trap_id"]   = df.trap_id.str.replace("--", "-")
    df["trap_id"]   = df.trap_id.apply(lambda x: x[:-1])
    df["date"]      = pd.to_datetime(df.date)
    df["year"]      = df.date.dt.year
    df["census"]    = df.occasion.str[-6:]
    df["plot"]      = df.occasion.str[0]
    
    return df

In [210]:
E  = sort_mams(E)
F  = sort_mams(F)
D  = sort_mams(D)
OG = sort_mams(OG)

frames = [E, F, D, OG]

mamls_df = pd.concat(frames, sort = False)

mamls_df["species"] = mamls_df.species.fillna("None")

In [211]:
m_lkup = pd.read_csv("../Data/small_mammals/mammals_lookup.csv")
m_lkup.columns = ["code", "species", "scientific"]

mamls_df["species"] = mamls_df.species.str.strip()

# if its a questionmark - I just go with it
# if its an either or I go with the first one!
mamls_df.loc[mamls_df.loc[:, "species"] == "CTRS-but see notes", "species"] = "CTRS"

mamls_df.loc[mamls_df.loc[:, "species"] == "SS?",          "species"] = "SS"
mamls_df.loc[mamls_df.loc[:, "species"] == "WH?",          "species"] = "WH"
mamls_df.loc[mamls_df.loc[:, "species"] == "PR?",          "species"] = "PR"
mamls_df.loc[mamls_df.loc[:, "species"] == "RR?",          "species"] = "RR"
mamls_df.loc[mamls_df.loc[:, "species"] == "MR?",          "species"] = "MR"
mamls_df.loc[mamls_df.loc[:, "species"] == "MR??",         "species"] = "MR"
mamls_df.loc[mamls_df.loc[:, "species"] == "RS?",          "species"] = "RS"
mamls_df.loc[mamls_df.loc[:, "species"] == "LGTRS?",       "species"] = "LGTRS"
mamls_df.loc[mamls_df.loc[:, "species"] == "BS?",          "species"] = "BS"
mamls_df.loc[mamls_df.loc[:, "species"] == "PSQ",          "species"] = "LSQ"      # not confident on this
mamls_df.loc[mamls_df.loc[:, "species"] == "BSQ",          "species"] = "BSQ?"
mamls_df.loc[mamls_df.loc[:, "species"] == "SSQ",          "species"] = "SSQ?"
mamls_df.loc[mamls_df.loc[:, "species"] == "RS or SS" ,    "species"] = "RS"
mamls_df.loc[mamls_df.loc[:, "species"] == "WH or SS",     "species"] = "WH"
mamls_df.loc[mamls_df.loc[:, "species"] == "BS/RS?",       "species"] = "BS"
mamls_df.loc[mamls_df.loc[:, "species"] == "PTSQ?",        "species"] = "PTSQ"
mamls_df.loc[mamls_df.loc[:, "species"] == "LETRS",        "species"] = "LETRS?"   # for some reason the lookup table has a ?
mamls_df.loc[mamls_df.loc[:, "species"] == "CBS?",         "species"] = "CBS"
mamls_df.loc[mamls_df.loc[:, "species"] == "SL?TRS",       "species"] = "SLTRS"
mamls_df.loc[mamls_df.loc[:, "species"] == "SLTRS?",       "species"] = "SLTRS"
mamls_df.loc[mamls_df.loc[:, "species"] == "L?TRS",        "species"] = "SLTRS"
mamls_df.loc[mamls_df.loc[:, "species"] == "LSQ?",         "species"] = "LSQ"
mamls_df.loc[mamls_df.loc[:, "species"] == "CTRS?",        "species"] = "CTRS"
mamls_df.loc[mamls_df.loc[:, "species"] == "LTRS or CTRS", "species"] = "CTRS"     # went with CTRS as LTRS could refer to a couple
mamls_df.loc[mamls_df.loc[:, "species"] == "LTRS",         "species"] = "LETRS?"   # not convinced about this one
mamls_df.loc[mamls_df.loc[:, "species"] == "Squirrel",     "species"] = "squirrel"
mamls_df.loc[mamls_df.loc[:, "species"] == "DTT_DEAD",     "species"] = "DTT"
mamls_df.loc[mamls_df.loc[:, "species"] == "LSQ?_DEAD",    "species"] = "LSQ"



mamls_df.loc[mamls_df.loc[:, "species"] == "squirrel",     "species"] = "unknown"  # ***mmm?***
mamls_df.loc[mamls_df.loc[:, "species"] == "See notes",    "species"] = "unknown"
mamls_df.loc[mamls_df.loc[:, "species"] == "??",           "species"] = "unknown"
mamls_df.loc[mamls_df.loc[:, "species"] == "?",            "species"] = "unknown"
mamls_df.loc[mamls_df.loc[:, "species"] == "Unknown",      "species"] = "unknown"


mamls_df = pd.merge(mamls_df,
                    m_lkup[["code", "scientific"]],
                    how      = "left",
                    left_on  = "species",
                    right_on = "code")

# gtet rid of the leftovers...
mamls_df = mamls_df.loc[-mamls_df.code.isna(), :]

In [212]:
trap_locs = pd.DataFrame({"trap_id" : mamls_df.trap_id.unique()})

In [213]:
trap_locs = trap_locs.merge(properties[["location", "longlat"]],
                            how      = "left",
                            left_on  = "trap_id",
                            right_on = "location")

In [214]:
trap_locs["second_order"] = trap_locs.longlat.apply(lambda x: closest(x, second_order))

In [215]:
mamls_df = mamls_df.merge(trap_locs, how = "left", on = "trap_id")

In [216]:
mamls_df.second_order = mamls_df.second_order.str[-3:]
mamls_df.second_order = mamls_df.second_order.astype(int)

In [218]:
mamls_df = mamls_df.merge(agb[["plot", "agb", "forestquality"]], how = "left",
                          left_on = "second_order", right_on = "plot")

In [266]:
mamls_df = mamls_df.rename(index=str, columns={"plot_x": "plot"})

mamls_df = mamls_df[["occasion",
                     "date",
                     "grid",
                     "point",
                     "trap",
                     "trap_id",
                     "species",
                     "year",
                     "plot",
                     "census",
                     "scientific",
                     "longlat",
                     "second_order",
                     "agb",
                     "forestquality"]]

mamls_df.to_csv("../Results/mammals_sorted2.csv")

In [224]:
mamls_df["trap_year"]   = mamls_df.trap_id + "_" + mamls_df.year.astype(str)
mamls_df["grid_year"]   = mamls_df.grid    + "_" + mamls_df.year.astype(str)
mamls_df["trap_census"] = mamls_df.trap_id + "_" + mamls_df.census
mamls_df["grid_census"] = mamls_df.grid    + "_" + mamls_df.census

In [241]:
def make_matrix(df, what):
    mx = df.groupby([what, "species"]).size().unstack()
    mx = mamls_mx.fillna(value = 0)
    mx = mx.drop("None", axis = 1)
    return mx

mamls_TY = make_matrix(mamls_df, "trap_year")
mamls_GY = make_matrix(mamls_df, "grid_year")
mamls_TC = make_matrix(mamls_df, "trap_census")
mamls_GC = make_matrix(mamls_df, "grid_census")

mamls_TY.to_csv("../Results/m_trap-year.csv")
mamls_GY.to_csv("../Results/m_grid-year.csv")
mamls_TC.to_csv("../Results/m_trap-census.csv")
mamls_GC.to_csv("../Results/m_grid_census.csv")

In [256]:
mamls_agb = mamls_df.groupby("plot_x").agb.describe()
mamls_agb.to_csv("../Results/mamls_agb.csv")

In [None]:
mamls_cen = census_diff(mamls_df)
mamls_cen.to_csv("../Results/mammals_census_dates.csv")

In [267]:
mamls_cn_diff = census_diff(mamls_df, "census")
mamls_yr_diff census_diff(mamls_df, "census")

Unnamed: 0,census,plot,min,max,mid,difference,diff_yrs,step
,012-10,D,2012-09-06,2012-09-06,2012-09-06,,,
D_012-10-012-12,012-12,D,2012-11-06,2012-11-06,2012-11-06,61.0,0.167123,012-10-012-12
,012-14,D,2012-06-13,2012-06-13,2012-06-13,,,
,2011-1,D,2011-08-20,2011-12-09,2011-10-14,,,
,2011-2,D,2011-08-21,2011-09-13,2011-09-01,,,
D_2011-2-2011-3,2011-3,D,2011-08-22,2011-09-14,2011-09-02,1.0,0.002740,2011-2-2011-3
D_2011-3-2011-4,2011-4,D,2011-08-23,2011-09-15,2011-09-03,1.0,0.002740,2011-3-2011-4
D_2011-4-2011-5,2011-5,D,2011-08-24,2011-09-16,2011-09-04,1.0,0.002740,2011-4-2011-5
,2011-6,D,2011-08-25,2011-08-25,2011-08-25,,,
D_2011-6-2011-7,2011-7,D,2011-08-26,2011-08-26,2011-08-26,1.0,0.002740,2011-6-2011-7


In [271]:
census_diff(mamls_df, "census")

Unnamed: 0,census,plot,min,max,mid,difference,diff_yrs,step
,012-10,D,2012-09-06,2012-09-06,2012-09-06,,,
D_012-10-012-12,012-12,D,2012-11-06,2012-11-06,2012-11-06,61.0,0.167123,012-10-012-12
,012-14,D,2012-06-13,2012-06-13,2012-06-13,,,
,2011-1,D,2011-08-20,2011-12-09,2011-10-14,,,
,2011-2,D,2011-08-21,2011-09-13,2011-09-01,,,
D_2011-2-2011-3,2011-3,D,2011-08-22,2011-09-14,2011-09-02,1.0,0.002740,2011-2-2011-3
D_2011-3-2011-4,2011-4,D,2011-08-23,2011-09-15,2011-09-03,1.0,0.002740,2011-3-2011-4
D_2011-4-2011-5,2011-5,D,2011-08-24,2011-09-16,2011-09-04,1.0,0.002740,2011-4-2011-5
,2011-6,D,2011-08-25,2011-08-25,2011-08-25,,,
D_2011-6-2011-7,2011-7,D,2011-08-26,2011-08-26,2011-08-26,1.0,0.002740,2011-6-2011-7
