# Normalize Data

#### TOC
- [Read DataFrame from the CSV file](#Read-DataFrame-from-the-CSV-file)

In [1]:
using DataFrames, CSV

# Read DataFrame from the CSV file

In [2]:
df = CSV.read("market_indicators.csv")
last(df, 20)

│   caller = read(::String) at CSV.jl:40
└ @ CSV /Users/uki/.julia/packages/CSV/MKemC/src/CSV.jl:40


Unnamed: 0_level_0,Day,Date,ISM Manufacturing,GDP q/q,S&P500,S&P500 y/y,S&P500 m/m
Unnamed: 0_level_1,Int64,Date…,Float64,Float64,Float64?,String?,String?
1,25963,2019-05-01,51.6,0.031,2941.76,8.22%,6.89%
2,25993,2019-05-31,51.6,0.031,2941.76,8.22%,missing
3,26023,2019-06-30,51.6,0.02,2941.76,8.22%,missing
4,26024,2019-07-01,51.3,0.02,2980.38,5.83%,1.31%
5,26054,2019-07-31,51.3,0.02,2980.38,5.83%,missing
6,26055,2019-08-01,48.8,0.02,2926.46,0.86%,-1.81%
7,26085,2019-08-31,48.8,0.02,2926.46,0.86%,missing
8,26086,2019-09-01,48.2,0.02,2976.74,2.15%,1.72%
9,26115,2019-09-30,48.2,0.021,2976.74,2.15%,missing
10,26116,2019-10-01,48.5,0.021,3037.56,12.02%,2.04%


## Column Numbers

In [None]:
column_rata_die = 1
column_date = 2
column_ISMM = 3
column_GPD_qq = 4
column_SP500 = 5
column_SP_yy = 6

In [None]:
a = df[:, column_GPD_qq]
a[1]

# Function to normalize the data

- takes a column (an array) of data as an input
- calculates minimum value of the column
- calculates maximum value of the column
- for each row it calculates:

$$ normalized_i = \frac{(value_i - min)}{(max - min)} $$

In [None]:
gdp = df[:, column_GPD_qq]
items = length(gdp)

In [None]:
original_column = 4

function normalize(df, original_column)
    original = df[:, original_column]
    items = length(original)
    normalized = zeros(Float64, items, 1) # type, rows, columns -- items×1 Array{Float64,2}
    
    min = minimum(original)
    max = maximum(original)   

    for i in 1:items
        value = (original[i] - min) / (max - min) # normalization formula 0.0 to 1.0
        value = value * 200                    # 0.0 to 200
        value = value - 100                    # -100 to 100
        normalized[i] = round(value, digits=2) #0.00
    end
    
    return normalized
end


gdp_normalized = normalize(gdp, original_column)

- https://docs.juliaplots.org/latest/layouts/

In [None]:
using Plots
gr()
plot(df[:,2], 
    [df[:,4]  gdp_normalized  ], 
    label    = ["IMS" "IMS normalized"  "GDP" "GDP normalized"],
    xlabel   = "time",
    ylabel   = "indicators",
    size     = (1450, 600), # width, height
    layout = (2, 1)
    )

In [None]:
using Plots
gr()
plot(df[:,2], 
    [ ims_normalized  gdp_normalized  ], 
    label    = [  "IMS normalized" "GDP normalized"],
    xlabel   = "time",
    ylabel   = "indicators",
    size     = (1450, 600), # width, height
    #, layout = (2, 1)
    )