# Analiza protestów i zamieszek w Indiach w latach 2016-2022 na podstawie bazy danych

### Import potrzebnych bibliotek i modułów

In [None]:
using Dates

In [None]:
import Pkg

In [None]:
function import_missing_libraries(lib_names::Vector{String})
    status_buffer = IOBuffer()
    for lib in lib_names
        Pkg.status(lib, io=status_buffer)
        if (occursin("No Matches",String(take!(status_buffer))))
            println("Nie znaleziono pakietu: "*lib)
            println("Próba dodania brakującego pakietu")
            Pkg.add(lib)
        end
        @eval using $(Symbol(lib))
    end
end;

In [None]:
import_missing_libraries(["Plots", "StatsPlots","StatsKit","VegaLite","PlotlyJS","StateSpaceModels"]);

## Pierwsza prezentacja

Zbiór świąt

In [None]:
holidays_set = ["26 January 2016","26 January 2017","26 January 2018","26 January 2019","26 January 2020","26 January 2021",
        "26 January 2022","15 August 2016","15 August 2017","15 August 2018","15 August 2019","15 August 2020","15 August 2021",
        "02 October 2019","02 October 2016","02 October 2017","02 October 2018","02 October 2020","02 October 2021",
        "11 March 2021","29 March 2021","21 April 2021","25 April 2021","26 May 2021","19 July 2021","12 August 2021",
        "30 August 2021","15 October 2021","18 November 2021","04 November 2021","21 February 2020","10 March 2020",
        "02 April 2020","06 April 2020","07 May 2020","30 July 2020","18 September 2020","30 August 2020","25 October 2020",
        "14 November 2020","30 November 2020","04 March 2019", "21 March 2019","14 April 2019","17 April 2019","18 May 2019",
        "11 August 2019","24 August 2019","31 August 2019","08 October 2019","27 October 2019","12 November 2019",
        "13 February 2018","02 March 2018","25 March 2018","29 March 2018","30 April 2018","22 August 2018","21 September 2018",
        "03 August 2018","19 October 2018", "07 November 2018","23 November 2018","24 February 2017","13 March 2017",
        "04 April 2017","09 April 2017","10 May 2017","02 September 2017","21 September 2017","14 August 2017","30 August 2017",
        "19 October 2017","04 November 2017","07 March 2016", "24 March 2016","15 April 2016","19 April 2016","21 May 2016",
        "13 September 2016","12 October 2016","25 August 2016","11 October 2016","30 October 2016","14 November 2016"];

Odczyt bazy danych i wstępne filtrowanie

In [None]:
df = CSV.read(
        "./data/2016-2022.csv", 
        select = ["year", "event_type", "sub_event_type", "longitude", "latitude", "fatalities", "location", "event_date", "admin1", "admin2", "time_precision","notes", "interaction"], 
        DataFrame
    )

In [None]:
function date_conversion!(df::DataFrame, kind::String)
    dates = df.event_date
    dates = Dates.Date.(dates, dateformat"d U y")
    if kind == "month"
        months = Dates.monthname.(dates)
        df[!,"month"] = months
    elseif kind == "day_of_week"
        days_of_week = Dates.dayname.(dates)
        df[!,"DayOfWeek"] = days_of_week
    elseif kind == "day"
        days = Dates.day.(dates)
        df[!,"day"] = days
    else
        throw(ArgumentError(kind, "kind must be either \"month\", \"day_of_week\", or \"dat\". "))
    end
    return df 
end;

In [None]:
["month","day_of_week","day"] .|> kind -> date_conversion!(df,kind);

In [None]:
function filter_data(data::DataFrame, param::Symbol, value)
    df = subset(data, param => x -> x .== value)
    return df
end;

In [None]:
select(df,[:DayOfWeek]) |> @vlplot(
    :bar,  
    x = {:DayOfWeek, title = "Day of Week","sort" = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]},
    y = {"count()", axis = {title = "Number of protests"}},
    color = {:DayOfWeek, legend = false},
    title = "By Day of Week"
    )

In [None]:
select(df,[:month]) |> @vlplot(
    :bar,  
    x = {:month, title = "Months", "sort" = ["January","February","March","April","May","June","July", "August","September","October", "November","December" ]},
    y = {"count()",title = "Number of protests"},
     color = {:month, legend = false},
    title = "By Month"
    )

In [None]:
function add_seasons!(data::DataFrame)
    season = []
    for month in data.month
        if month in ["December","January", "February"]
            push!(season, "Winter")
        elseif month in ["March","April", "May"]
            push!(season, "Spring")
        elseif month in ["June","July", "August"]
            push!(season, "Summer")
        elseif month in ["September","October", "November"]
            push!(season, "Autumn")
        end
    end
    data[!,"Seasons"] = season
    data
end;

In [None]:
add_seasons!(df)

In [None]:
@vlplot(
    :bar,  
    data = select(df,[:Seasons]),
    x = {:Seasons, title = "Seasons", sort = ["Winter", "Spring", "Summer", "Autumn" ]},
    y = {"count()",title = "Number of protests"},
     color = {:Seasons, legend = false},
    title = "By Season",
    width=600,
    height=600
    )

In [None]:
function holiday!(df::DataFrame, dates::Vector{String})
    holy = in.(df.event_date, [dates])
    df[!,"Holiday"] = holy
end;

In [None]:
data_2019 = filter_data(df,:year,2019);
holiday!(data_2019, holidays_set);

In [None]:
function my_month(df, month)
    df = filter_data(df, :month, month)
    plot = @vlplot(
    :bar,  
    x = {df.day, title = "Days"},
    y = {"count()",title = "Number of Protests"},
    title = month*" in "*string(df.year[1]),
    color = {df.Holiday, legend = false},
    )
    return plot
end;

In [None]:
my_month(data_2019,"April")

In [None]:
function df_(data,year)
    select(data, "month" in names(data) ? [:event_type,:latitude,:longitude,:year,:month] : [:event_type,:latitude,:longitude,:year]) |> d -> subset(d, :year => y -> y .== year)
end;

In [None]:
function getIDs_(input::String)
	translation = Dict([
		("Andaman and Nicobar Islands"               , "AN"),
		("Andhra Pradesh"                            , "AP"),
		("Arunachal Pradesh"                         , "AR"),
		("Assam"                                     , "AS"),
		("Bihar"                                     , "BR"),
		("Chandigarh"                                , "CH"),
		("Chhattisgarh"                              , "CG"),
		("Dadra and Nagar Haveli and Daman and Diu"  , "DH"),
		("Delhi"                                     , "DL"),
		("Goa"                                       , "GA"),
		("Gujarat"                                   , "GJ"),
		("Haryana"                                   , "HR"),
		("Himachal Pradesh"                          , "HP"),
		("Jammu and Kashmir"                         , "JK"),
		("Jharkhand"                                 , "JH"),
		("Karnataka"                                 , "KA"),
		("Kerala"                                    , "KL"),
		("Ladakh"                                    , "LA"),
		("Lakshadweep"                               , "LD"),
		("Madhya Pradesh"                            , "MP"),
		("Maharashtra"                               , "MH"),
		("Manipur"                                   , "MN"),
		("Meghalaya"                                 , "ML"),
		("Mizoram"                                   , "MZ"),
		("Nagaland"                                  , "NL"),
		("Odisha"                                    , "OR"),
		("Puducherry"                                , "PY"),
		("Punjab"                                    , "PB"),
		("Rajasthan"                                 , "RJ"),
		("Sikkim"                                    , "SK"),
		("Tamil Nadu"                                , "TN"),
		("Telangana"                                 , "TG"),
		("Tripura"                                   , "TR"),
		("Uttar Pradesh"                             , "UP"),
		("Uttarakhand"                               , "UL"),                        
		("West Bengal"                               , "WB")
	])
	translation[input]
end

In [None]:
function getIDs_(input::Vector{String})
	translation = Dict([
		("Andaman and Nicobar Islands"               , "AN"),
		("Andhra Pradesh"                            , "AP"),
		("Arunachal Pradesh"                         , "AR"),
		("Assam"                                     , "AS"),
		("Bihar"                                     , "BR"),
		("Chandigarh"                                , "CH"),
		("Chhattisgarh"                              , "CT"),
		("Dadra and Nagar Haveli and Daman and Diu"  , "DH"),
		("Delhi"                                     , "DL"),
		("Goa"                                       , "GA"),
		("Gujarat"                                   , "GJ"),
		("Haryana"                                   , "HR"),
		("Himachal Pradesh"                          , "HP"),
		("Jammu and Kashmir"                         , "JK"),
		("Jharkhand"                                 , "JH"),
		("Karnataka"                                 , "KA"),
		("Kerala"                                    , "KL"),
		("Ladakh"                                    , "LA"),
		("Lakshadweep"                               , "LD"),
		("Madhya Pradesh"                            , "MP"),
		("Maharashtra"                               , "MH"),
		("Manipur"                                   , "MN"),
		("Meghalaya"                                 , "ML"),
		("Mizoram"                                   , "MZ"),
		("Nagaland"                                  , "NL"),
		("Odisha"                                    , "OR"),
		("Puducherry"                                , "PY"),
		("Punjab"                                    , "PB"),
		("Rajasthan"                                 , "RJ"),
		("Sikkim"                                    , "SK"),
		("Tamil Nadu"                                , "TN"),
		("Telangana"                                 , "TG"),
		("Tripura"                                   , "TR"),
		("Uttar Pradesh"                             , "UP"),
		("Uttarakhand"                               , "UL"),                        
		("West Bengal"                               , "WB")
	])
	input .|> k -> translation[k]
end

In [None]:
function Counter_of_protests(df::DataFrame)
	counter = countmap(df.admin1)
	DataFrame(ID_admin1 = getIDs_([keys(counter)...]), n = [values(counter)...], state = [keys(counter)...])
end

Tworzenie różnych wykresów słupkowych

In [None]:
@vlplot(                                      # wykres ilość protestów w danym roku; wykres słupkowy
	heigh = 500,
	width = 300,
    :bar,
	data=select(df,[:year,:event_type]),
    x={
		:year,
	    title = "Lata" },
    y= { 
		"count()",
	     title = "Ilość występowania protestów" },  
    color= { 
		:event_type,
        legend={
            title="Rodzaj konfilktu"
        }
   }
)

In [None]:
@vlplot(
	heigh = 500,
	width = 300,
    :bar,
	data=select(df_(df, 2016), [:month, :event_type]),
    x={
		:month,
	    title = "Miesiące" },
    y= { 
		"count()",
	     title = "Ilość występowania protestów" },  
    color= { 
		:event_type,
        legend={
            title="Rodzaj konfilktu"
        }
   },
   title = "Badanie występowania protestów w roku 2016"
    
)

Tworzenie map Indii, przedstawianie na nich danych

a) punktowe rozmieszczenie konfliktów na mapie

In [None]:
topo_json = read("./data/mapsindia1.json", String);

In [None]:
@vlplot(width=500, height=400) +
@vlplot(
    data={
        values=topo_json,
        format={
            type=:topojson,
            feature=:states
        }
    },
    mark={
        :geoshape,
        stroke=:white,
        strokeWidth=2
    },
    color={value="#bebebe"}
) +
@vlplot(
	:circle,
    data       = select(df,[:longitude,:latitude,:event_type]),
    projection = {type=:equalEarth},
    longitude  = :longitude,
    latitude   = :latitude,
    color      = { 
		field  = :event_type,            # w opcji color możemy zmienić to co chcemy pokazać na mapie
	    title  = "Rodzaje konfliktów",
	},
    title      = "Mapa występowania konfliktów",
)

In [None]:
@vlplot(width=500, height=400) +
@vlplot(
    data={
        values=topo_json,
        format={
            type=:topojson,
            feature=:states
        }
    },
    mark={
        :geoshape,
        stroke=:white,
        strokeWidth=2
    },
    color={value="#bebebe"}
) +
@vlplot(
	:circle,
    data       = select(df_(df, 2017),[:longitude,:latitude,:event_type]), # korzystanie z funkcji df_ jeśli chcemy wyodrębnić dany rok
    projection = {type=:equalEarth},
    longitude  = "longitude",
    latitude   = "latitude",
    color      = { 
		field  = "event_type",            # w opcji color możemy zmienić to co chcemy pokazać na mapie
	    title  = "Rodzaje konfliktów",
	},
    title      = "Mapa występowania konfliktów",
)

b) Tworzenie heatmapy, rozmieszczenia konfliktow

In [None]:
function Count_of_protests(df::DataFrame)
	counter = countmap(df.admin1)
	d = DataFrame(ID_admin1 = getIDs_([keys(counter)...]), n = [values(counter)...], state = [keys(counter)...])
end;

In [None]:
@vlplot(
	height = 700,
	width = 900 ) +
@vlplot(
	data = {
	    values = topo_json,
        format = {
		    type=:topojson,
            feature=:states
		}
    },
    mark = {
	    :geoshape,
		stroke=:white,
		strokeWidth=2
    },
    transform=[{
        lookup=:id,
        from={
            data=sort!(Count_of_protests(df), :state),
            key=:ID_admin1,
            fields=["n"]
		},
	as=:admin
    }],
    projection = {type=:equalEarth},
	color = {
	   :admin,
	   type=:quantitative,
       title = "Natężenie zdarzeń"
		},
    title = "Mapa przedstawiająca ilość konfliktów w danym rejonie"
) 

In [None]:
df_cv = select(df, [:year, :event_date,:interaction, :sub_event_type, :latitude, :longitude]) |> d -> subset(d, :interaction => i -> in.(i,[[15 16 17 35 36 37]]));

In [None]:
dom = sort(unique(df.sub_event_type));

In [None]:
colors = ["#069668", "#f90da0", "#35e0a9", "#c6523e", "#75d5e1", "#eb1138", "#347383", "#ccb0cb", "#476af9", "#e1c637", "#803ef3", "#f6a679", "#6e6494", "#fd95e8", "#926026"];

In [None]:
@vlplot(
    width = 900, 
    height= 700
) +
@vlplot(
    data = {
        values = topo_json,
        format = {
            type=:topojson,
            feature=:states
        }
    },
    mark = {
        :geoshape,
        stroke=:white,
        strokeWidth=2
    },
    color = { value="#bebebe" },
) +
@vlplot(
    mark = {
		:circle,
		size = 12,
	},
    data=df_cv,
    projection = { type=:equalEarth },
    longitude  = :longitude,
    latitude   = :latitude,
    color      = { 
		:sub_event_type,
		scale={
            domain=dom,
            range=colors
		}, 
		title  = "Subevent type",
		legend = false,
	},
	title      = "Mapa konfliktów z udziałem organów rządowych",
)

In [None]:
@vlplot(
	width = 900, 
	height= 700
) +
@vlplot(
	:bar,
	data  = filter(:year => yr -> yr<2022  , df),
	x  = {
		:year,
		title = "Rok"
	},
	y  = {
		"count()",
		title = "Ilość zdarzeń"
	},
	color = {
		:sub_event_type,
		scale={
            domain=dom,
            range=colors
		}, 
		title="Rodzaj konfilktu"
	},
	title = "Ilość konfliktów z udziałem organów rządowych",
)

## Druga prezentacja

In [None]:
function by_day(data::DataFrame, day::Int, month::String)
    data = filter_data(data,:day,day)
    data = filter_data(data,:month,month)
    plot = @vlplot(
    :bar,
    title =  string(data.day[1])*" "*string(data.month[1]),
    x = {data.year, type = :ordinal, title = "Years"},
    y = {"count()", title = "Protests' amount"}
)
    return plot
end;

In [None]:
by_day(df, 02, "October")

In [None]:
function by_month(data::DataFrame, month::String)
    data_2 = filter_data(data,:month,month)
    plot = @vlplot(
    :bar,
    title =  string(data_2.month[1]),
    x = {data_2.year, type = :ordinal, title = "Years"},
    y = {"count()", title = "Protests' amount"}
)
    return plot
end;

In [None]:
by_month(df,"April")

In [None]:
function radius_of_days(df::DataFrame, date::String, R::Int)
    dates = df.event_date
    dates = Dates.Date.(dates, dateformat"d U y")
    df[!,"formal_date"] = dates
    my_date = Dates.Date.(date, dateformat"d U y")
    radiuses = []
    max = 0
    this_r = 0
    for r in range(-R,R)
        the_day = my_date + Dates.Day(r)
        df_f = filter_data(df,:formal_date, the_day)
        n = nrow(df_f)
        push!(radiuses, (n,r))
        if n > max
            max = n
            this_r = r
        end
    end
    return this_r
end


In [None]:
r_average_list = []
for holiday in holidays_set
    r_of_all = radius_of_days(df, holiday, 2)
    push!(r_average_list,r_of_all)
end
print(r_average_list)
mean(r_average_list)

In [None]:
countmap(r_average_list)

In [None]:
countmap(df.time_precision)

In [None]:
function srednia_tp(data, month)
    data = filter_data(data,:month,month)
    data = groupby(data, :day)
    data = combine(data, nrow, :time_precision .=> mean, :month)
    return data
end

In [None]:
data_2019_tp = srednia_tp(data_2019,"October")

In [None]:
select(data_2019_tp,[:day, :time_precision_mean]) |> @vlplot(
    :bar, 
    width = 600,
    x = {:day, title = "Days"},
    y = {:time_precision_mean, title = "Time Precision"},
    title = "Level of time precision during " *(data_2019_tp.month[1])*" "*string(data_2019.year[1]),
)

Próba korelacji poparcia procentowego z ilością protestów w danym stanie

In [None]:
datas = select(df,[:admin1, :admin2, :year,:notes,:event_type])

df_percent = datas[(datas.year .== 2019) .| (datas.year .== 2019) .| (datas.year .==2019),:];

In [None]:
df_per_col = groupby(df_percent, :admin1);
df_per_col = combine(df_per_col, nrow .=> :num_of_protests) 
df_per_col[!, "admin1"]

In [None]:
percentage_votes = [45, 50, 54, 68, 2, 28, 25, 37, 55, 2, 55, 30, 52, 50, 70, 20, 50, 38, 65, 45, 58, 54, 46, 57, 5, 52, 33, 20, 64, missing, 54, 4, missing, missing, missing]

In [None]:
df_per_col[!, "percentage_votes"] = percentage_votes

In [None]:
df_per_col_1 = dropmissing(df_per_col)
x = df_per_col_1[!, "percentage_votes"]
y = df_per_col_1[!, "num_of_protests"]

In [None]:
trace = PlotlyJS.scatter(df_per_col, x=:percentage_votes, y=:num_of_protests, mode="markers")
layout = Layout(xaxis_title="Procentowe poparcie partii rządzącej w wyborach", yaxis_title="Ilość protestów", title=attr(text="Ilość protestów w zależności od procentowego poparcia partii rządzącej", y=0.95,
x=0.5, xanchor="center", yanchor="top"))
PlotlyJS.plot(trace, layout)

In [None]:
PlotlyJS.plot(crosscor(x,y), Layout(yaxis_title="współczynnik korelacji", xaxis_title="lags", title_text ="Oscylacje współczynnika korelacji"))

Korelacja procentowego udziału pierwiastka w składzie chemicznym gleby a ilością protestów

In [None]:
##funkcja wyodrębniająca protesty, które w notaktach były opisane przy użyciu słów związanych z rolnictwem##
function farmers_filter(data::String)
	keywords = [
		"farm",
		"agricult",
		"plantat",
		"dairy",
		"harvest",
		#"cultiv",
		#"crop"
	]
	any(occursin.(keywords,lowercase(data)))
end


##funkcja wykorzystująca powyższą do filtrowania protestów względem kolumny "notes"##
function refilter(df::DataFrame)
	out = filter(:notes => farmers_filter, df) |> d -> filter(:year => yr -> yr in [2020, 2021], d) |> d -> filter(:event_type => event -> event == "Protests", d)
end


In [None]:
df_agri = datas[(datas.year .== 2020) .| (datas.year .==2021),:];
df_agri_filtered = refilter(df_agri)
df_agri_col = groupby(df_agri_filtered, :admin1);
df_agri_col = combine(df_agri_col, nrow .=> :num_of_protests) 

In [None]:
##te stany trzeba usunąc z df, nie było informacji o procentowym składzie pierwiastków w znalezionej bazie danych##

wrong_cities = ["Telangana", "Jammu and Kashmir", "Delhi", "Goa", "Arunachal Pradesh","Jharkhand","Chandigarh", "Chhattisgarh", "Puducherry", "Arunachal","Pradesh", "Mizoram", "Manipur", "Meghalaya", "Uttarakhand" ,"Ladakh","Nagaland", "Sikkim", "Lakshadweep", "Andaman and Nicobar Islands"];

In [None]:
for city in wrong_cities
    df_agri_col = df_agri_col[df_agri_col.admin1 .!= city,:]
end

In [None]:
df_agri_col = sort(df_agri_col, :admin1)

In [None]:
Nitrogen = [1.55,1.76, missing, 1.71, 1.04, 2.34, 2.05, 1.66, 1.27, 1.40, 1.57, 1.67, missing, 1.34, 1.76, 1.25, 1.67];
df_agri_col[!,"Nitrogen"] = Nitrogen;
agri_col_cleansed = dropmissing(df_agri_col)
x1 = agri_col_cleansed[!,"Nitrogen"]
y1 = agri_col_cleansed[!,"num_of_protests"]

In [None]:
PlotlyJS.plot(crosscor(x1,y1), Layout(yaxis_title="współczynnik korelacji", xaxis_title="lags", title_text ="Oscylacje współczynnika korelacji"))

Stworzenie heatmapy protesty per ludność

In [None]:
population_data = CSV.read("./data/populationCSV.csv", 
   select = ["India / State/ Union Territory", "Population2011" ], DataFrame)

In [None]:
d = sort!(Count_of_protests(df), :state)

In [None]:
population_state = @view population_data[2:37, :]

In [None]:
d[!, "Population"] = population_state.Population2011

In [None]:
d[!, "n/Population"] = d.n ./ d.Population

In [None]:
@vlplot(
	height = 700,
	width = 900 ) +
@vlplot(
	data = {
	    values = topo_json,
        format = {
		    type=:topojson,
            feature=:states
		}
    },
    mark = {
	    :geoshape,
		stroke=:white,
		strokeWidth=2
    },
    transform=[{
        lookup=:id,
        from={
            data=d,
            key=:ID_admin1,
            fields=["n/Population"]
		},
	as=:admin
    }],
    projection = {type=:equalEarth},
	color = {
	   :admin,
	   type=:quantitative,
       title = "Natężenie zdarzeń"
		},
    title = "Mapa przedstawiająca ilość konfliktów na osobę w danym rejonie"
) 

### Predykcja

In [None]:
function timeseries_gen(df::DataFrame, region::String, years::Tuple{Int,Int}, event_type::String = "any", fill::Bool=true)
	data = select(df, [:year, :event_date, :admin1, :event_type])
	data = subset(data, :year => y -> y .> years[1]-1 .&& y .< years[2]+1, :admin1 => a -> a .== region)
	if event_type != "any"
		data = subset(data, :event_type => e -> e .== event_type)
	end
	data[!, :time_stamp] = Date.(data.event_date, "d U y")
	count = sort(countmap(data[!,:time_stamp]))
	t = Dates.value.([keys(count)...].-Date(years[1],1,1))
	n = [values(count)...]
	if fill
		l = Int(Dates.value(Date(years[2]+1,1,1)-Day(1)-Date(years[1],1,1)))
		for i in 1:l+1
			try
				if t[i] != i-1
					insert!(t,i,i-1)
					insert!(n,i,0)
					
				end
			catch e
				if isa(e, BoundsError)
					insert!(t,i,i-1)
					insert!(n,i,0)
				else
					throw(e)
				end
			end
		end
	end
	ts    = DataFrame(t = t, n = n)
end

In [None]:
years = 2016,2018
city = "Jammu and Kashmir"
flog = false
ts = timeseries_gen(df, city, years, "Protests", true)

In [None]:
ts |> @df StatsPlots.plot(:t,:n,dpi=600,title=city*" in $(years[1])-$(years[end])",xlabel="t [day]",ylabel="number of events",legend=false)

In [None]:
d = ts |> @df StatsPlots.plot(:t,:n,xlabel="t [day]",ylabel="number of events",label="",dpi=600)
loenp = []
tp = range(extrema(ts.t)...; step = .1);
for i in [0.02,0.1,0.15,0.25]
	model = loess(ts.t, ts.n, span=i);
	np = Loess.predict(model, tp);
	push!(loenp,np)
	plot!(tp,np,palette=cgrad(:buda, 4, categorical = true),label="span=$(i)")
end
np = loenp[3]
display(d)

In [None]:
dif = ts.n .- [np[1:10:end]...]
Plots.plot(ts.t, dif, dpi=600)

In [None]:
Plots.histogram(dif, dpi=600, xlabel="number of events",ylabel="number of occurrences", legend=false)

In [None]:
timedata_fit = DataFrame(t = ts.t, n = np[1:10:end])
timedata_fit |> @df StatsPlots.plot(:t,:n,dpi=600)

In [None]:
fi = fit_mle(Normal, dif)
StatsPlots.plot(fi, dpi=600)

In [None]:
fi2 = fit_mle(LogNormal, dif.+15)
StatsPlots.plot(fi2.-15, dpi=600)

In [None]:
r = rand(fi,length(timedata_fit.t))
first(timeseries_gen(df, city, years.+2, "Protests", true), 200) |> @df StatsPlots.plot(:t,:n,dpi=600, label="Original data",title="Forecast for $(years[end]+1)",xlabel="t [day]",ylabel="number of events")
StatsPlots.plot!((r+np[1:10:end])[1:200], linestyle=:dot, label="Normal distribution")
r = rand(fi2,length(timedata_fit.t))
StatsPlots.plot!((r.-15+np[1:10:end])[1:200], linestyle=:dash, label="Log-normal distribution")

In [None]:
y = Vector{Float64}(ts.n);

In [None]:
model = auto_ets(y)

In [None]:
StateSpaceModels.fit!(model)

In [None]:
forec = StateSpaceModels.forecast(model, 100)

In [None]:
plot(model, forec,dpi=600,title="Auto ETS", xlabel="time [day]", ylabel="number of events",labels=["original data" "prediction"])