In [1]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
tqdm.pandas()

# FireRisk Cont

In [2]:
# loading the dataset
def loading_the_data(data_dir):
    # Generate data paths with labels
    filepaths = []
    labels = []

    # Get folder names
    folds = os.listdir(data_dir)

    for fold in folds:
        foldpath = os.path.join(data_dir, fold)
        filelist = os.listdir(foldpath)
        for file in filelist:
            fpath = os.path.join(foldpath, file)
            
            filepaths.append(fpath)
            labels.append(fold)

    # Concatenate data paths with labels into one DataFrame
    Fseries = pd.Series(filepaths, name='filepaths')
    Lseries = pd.Series(labels, name='labels')

    df = pd.concat([Fseries, Lseries], axis=1)
    
    return df

In [3]:
dir = '..\FireRisk\\train'

In [4]:
train_df = loading_the_data(dir)
train_df

Unnamed: 0,filepaths,labels
0,G:\FireRisk\train\High\27032281_4_-103.4304412...,High
1,G:\FireRisk\train\High\27038991_4_-77.77273442...,High
2,G:\FireRisk\train\High\27040201_4_-73.83896834...,High
3,G:\FireRisk\train\High\27042071_4_-122.1662712...,High
4,G:\FireRisk\train\High\27042401_4_-121.1231610...,High
...,...,...
70326,G:\FireRisk\train\Water\35471591_7_-72.4088150...,Water
70327,G:\FireRisk\train\Water\35484351_7_-82.8478475...,Water
70328,G:\FireRisk\train\Water\35487101_7_-72.1588909...,Water
70329,G:\FireRisk\train\Water\35497331_7_-90.9452064...,Water


In [5]:
label_mapping = {
    "Very_Low": 1,
    "Low": 2,
    "Moderate": 3,
    "High": 4,
    "Very_High": 5,
    "Non-burnable": 6,
    "Water": 7
}

train_df['labels'] = train_df['labels'].map(label_mapping)
train_df

Unnamed: 0,filepaths,labels
0,G:\FireRisk\train\High\27032281_4_-103.4304412...,4
1,G:\FireRisk\train\High\27038991_4_-77.77273442...,4
2,G:\FireRisk\train\High\27040201_4_-73.83896834...,4
3,G:\FireRisk\train\High\27042071_4_-122.1662712...,4
4,G:\FireRisk\train\High\27042401_4_-121.1231610...,4
...,...,...
70326,G:\FireRisk\train\Water\35471591_7_-72.4088150...,7
70327,G:\FireRisk\train\Water\35484351_7_-82.8478475...,7
70328,G:\FireRisk\train\Water\35487101_7_-72.1588909...,7
70329,G:\FireRisk\train\Water\35497331_7_-90.9452064...,7


In [7]:
merged_df = pd.read_csv("../conversion_cnt.csv")
merged_df

Unnamed: 0,filepaths,labels,xcoord,ycoord,index_right,longitude,latitude,label
0,G:\FireRisk\train\High\27032281_4_-103.4304412...,4,-103.430441,44.280426,22399775,-103.431216,44.279728,1237
1,G:\FireRisk\train\High\27038991_4_-77.77273442...,4,-77.772734,43.225073,28586043,-77.772401,43.224895,628
2,G:\FireRisk\train\High\27040201_4_-73.83896834...,4,-73.838968,42.608234,32216703,-73.840218,42.609576,720
3,G:\FireRisk\train\High\27042071_4_-122.1662712...,4,-122.166271,41.842358,36848757,-122.166224,41.841891,805
4,G:\FireRisk\train\High\27042401_4_-121.1231610...,4,-121.123161,42.054689,35525832,-121.123112,42.055788,1093
...,...,...,...,...,...,...,...,...
70326,G:\FireRisk\train\Water\35471591_7_-72.4088150...,7,-72.408815,40.992578,42161040,-72.407403,40.992165,0
70327,G:\FireRisk\train\Water\35484351_7_-82.8478475...,7,-82.847848,42.499614,32875582,-82.847320,42.498232,0
70328,G:\FireRisk\train\Water\35487101_7_-72.1588909...,7,-72.158891,40.942600,42464162,-72.158345,40.942353,0
70329,G:\FireRisk\train\Water\35497331_7_-90.9452064...,7,-90.945206,43.063911,29535000,-90.946095,43.063740,0


# Normalization

In [8]:
sample_per_label = merged_df.groupby('labels').first().reset_index()

labels_of_interest = [1, 2, 3, 4, 5, 6, 7]
filtered_samples = sample_per_label[sample_per_label['labels'].isin(labels_of_interest)]
filtered_samples

Unnamed: 0,labels,filepaths,xcoord,ycoord,index_right,longitude,latitude,label
0,1,G:\FireRisk\train\Very_Low\27032721_1_-101.940...,-101.940579,44.355358,21956069,-101.9398,44.35591,26
1,2,G:\FireRisk\train\Low\27032391_2_-103.05828990...,-103.05829,44.30072,22280313,-103.059095,44.300239,155
2,3,G:\FireRisk\train\Moderate\27033601_3_-98.9527...,-98.952796,44.455109,21375544,-98.954036,44.455533,314
3,4,G:\FireRisk\train\High\27032281_4_-103.4304412...,-103.430441,44.280426,22399775,-103.431216,44.279728,1237
4,5,G:\FireRisk\train\Very_High\27041631_5_-123.54...,-123.547052,41.5463,38711101,-123.546298,41.545952,4735
5,6,G:\FireRisk\train\Non-burnable\27033161_6_-100...,-100.447787,44.413602,21614844,-100.448383,44.414512,0
6,7,G:\FireRisk\train\Water\27035581_7_-92.2179351...,-92.217935,44.434161,21497779,-92.217754,44.435023,0


In [9]:
def normalize_continuous_label(label):
    if label <= 0:
        return 0
    elif label <= 61:
        return label / 61 * 0.2
    elif label <= 178:
        return (label - 61) / (178 - 61) * 0.2 + 0.2
    elif label <= 489:
        return (label - 178) / (489 - 178) * 0.2 + 0.4
    elif label <= 1985:
        return (label - 489) / (1985 - 489) * 0.2 + 0.6
    elif label <= 100000:
        return (label - 1985) / (100000 - 1985) * 0.2 + 0.8
    else:
        return 1

# Apply the normalization function to the 'label' column
merged_df['normalized_label'] = merged_df['label'].apply(normalize_continuous_label)
merged_df

Unnamed: 0,filepaths,labels,xcoord,ycoord,index_right,longitude,latitude,label,normalized_label
0,G:\FireRisk\train\High\27032281_4_-103.4304412...,4,-103.430441,44.280426,22399775,-103.431216,44.279728,1237,0.700000
1,G:\FireRisk\train\High\27038991_4_-77.77273442...,4,-77.772734,43.225073,28586043,-77.772401,43.224895,628,0.618583
2,G:\FireRisk\train\High\27040201_4_-73.83896834...,4,-73.838968,42.608234,32216703,-73.840218,42.609576,720,0.630882
3,G:\FireRisk\train\High\27042071_4_-122.1662712...,4,-122.166271,41.842358,36848757,-122.166224,41.841891,805,0.642246
4,G:\FireRisk\train\High\27042401_4_-121.1231610...,4,-121.123161,42.054689,35525832,-121.123112,42.055788,1093,0.680749
...,...,...,...,...,...,...,...,...,...
70326,G:\FireRisk\train\Water\35471591_7_-72.4088150...,7,-72.408815,40.992578,42161040,-72.407403,40.992165,0,0.000000
70327,G:\FireRisk\train\Water\35484351_7_-82.8478475...,7,-82.847848,42.499614,32875582,-82.847320,42.498232,0,0.000000
70328,G:\FireRisk\train\Water\35487101_7_-72.1588909...,7,-72.158891,40.942600,42464162,-72.158345,40.942353,0,0.000000
70329,G:\FireRisk\train\Water\35497331_7_-90.9452064...,7,-90.945206,43.063911,29535000,-90.946095,43.063740,0,0.000000


In [10]:
sample_per_label = merged_df.groupby('labels').first().reset_index()

labels_of_interest = [1, 2, 3, 4, 5, 6, 7]
filtered_samples = sample_per_label[sample_per_label['labels'].isin(labels_of_interest)]
filtered_samples = filtered_samples[['labels', 'label', 'normalized_label']]
filtered_samples

Unnamed: 0,labels,label,normalized_label
0,1,26,0.085246
1,2,155,0.360684
2,3,314,0.48746
3,4,1237,0.7
4,5,4735,0.805611
5,6,0,0.0
6,7,0,0.0
