# Data manipulation

In [62]:
import pandas as pd
import math

## 0.0 Start Dataframe

In [168]:
df = pd.read_excel(r"../data/nh_data_oct_18.xlsx", sheet_name="group2")

In [188]:
total_points = df['Point'].max() # total number of points on the line, should be 10
total_quadrants = total_points *4 # total number of quadrants/plants surveyed, should be 40

## 1. Calculate areas covered & average distance from point

In [194]:
df['Basal area covered/centimetre^2'] = (df['Diametre/centimetre'] / 2) **2 * math.pi
df.head()

Unnamed: 0,Point,Quad No.,Species,Common name,Diametre/centimetre,Point to plant/metre,Basal area covered/centimetre^2
0,1,1,,Eastern white pine,50.5,4.0,2002.961666
1,1,2,,Eastern hemlock,46.6,3.0,1705.539236
2,1,3,,Yellow birch,21.3,4.25,356.327293
3,1,4,,Eastern hemlock,22.5,4.3,397.60782
4,2,1,,Eastern hemlock,14.4,5.15,162.860163


In [121]:
avg_distance = df['Point to plant/metre'].sum()/total_points
avg_distance # in metre

14.834

## 2. Data for number of individuals by species

In [122]:
number_species = df['Common name'].value_counts().to_frame().reset_index()
number_species.rename(columns={'index': 'Common name', 'Common name':'Species head count'})

Unnamed: 0,Common name,Species head count
0,Sugar maple,12
1,Eastern white pine,10
2,Eastern hemlock,7
3,Northern red oak,5
4,Yellow birch,3
5,Green ash,3


## 3. Coverage area by species

In [171]:
species_areas = df.groupby('Common name').sum('Basal area covered/centimetre^2').reset_index()

In [174]:
species_areas.drop(['Point', 'Species', 'Quad No.', 'Diametre/centimetre', 'Point to plant/metre'], axis = 1)

Unnamed: 0,Common name,Basal area covered/centimetre^2
0,Eastern hemlock,3667.44814
1,Eastern white pine,28392.70124
2,Green ash,885.073044
3,Northern red oak,2669.026433
4,Sugar maple,3332.789982
5,Yellow birch,984.685093


## 4. Calculations for Point-Quarter Sampling

In [182]:
point_quarter_sampling = pd.DataFrame()
point_quarter_sampling['Common name'] = number_species['index']


point_quarter_sampling

Unnamed: 0,Common name
0,Sugar maple
1,Eastern white pine
2,Eastern hemlock
3,Northern red oak
4,Yellow birch
5,Green ash


### Total Density

In [189]:
td = 1/((avg_distance/total_quadrants) ** 2)
td # 

7.27115551331095

### Relative density

In [184]:
point_quarter_sampling['Relative density'] = number_species['Common name']/ total_quadrants

In [185]:
point_quarter_sampling

Unnamed: 0,Common name,Relative density
0,Sugar maple,0.3
1,Eastern white pine,0.25
2,Eastern hemlock,0.175
3,Northern red oak,0.125
4,Yellow birch,0.075
5,Green ash,0.075


### Absolute density

In [186]:
point_quarter_sampling['Absolute density'] = point_quarter_sampling['Relative density'] * td

In [187]:
point_quarter_sampling

Unnamed: 0,Common name,Relative density,Absolute density
0,Sugar maple,0.3,2.181347
1,Eastern white pine,0.25,1.817789
2,Eastern hemlock,0.175,1.272452
3,Northern red oak,0.125,0.908894
4,Yellow birch,0.075,0.545337
5,Green ash,0.075,0.545337


### Frequency

In [195]:
point_quarter_sampling['Frequency'] = number_species['Common name'] / total_points

### Relative frequency

In [196]:
point_quarter_sampling['Relative frequency'] = point_quarter_sampling['Frequency'] / point_quarter_sampling['Frequency'].sum()

### Coverage

In [190]:
point_quarter_sampling['Coverage'] = species_areas['Basal area covered/centimetre^2'] * point_quarter_sampling['Absolute density'] / number_species['Common name']

### Relative coverage

In [191]:
point_quarter_sampling['Relative coverage'] = point_quarter_sampling['Coverage']/point_quarter_sampling['Coverage'].sum()

### Importance value

In [197]:
point_quarter_sampling['Importance value'] = point_quarter_sampling['Relative density'] + point_quarter_sampling['Relative frequency'] + point_quarter_sampling['Relative coverage']