In [1]:
using StatsKit, Dates, Statistics, PlotlyJS, CSV;

In [35]:
dataset = CSV.read(
		"../data/2016-2022.csv", 
		select = ["year", "event_type", "sub_event_type", "longitude", "latitude", "fatalities", "location", "event_date", "admin1", "time_precision", "notes"], 
		DataFrame
    )
df = subset(dataset, :admin1 => x -> (x .== "Tamil Nadu" .|| x.== "Punjab" .|| x.== "Kerala" .|| x.=="Assam" .|| x.=="Madhya Pradesh" .|| x.=="Rajasthan" .|| x.=="Maharashtra" .|| x.=="Uttar Pradesh" .|| x.=="Bihar" .|| x.=="Telangana"))

Unnamed: 0_level_0,event_date,year,time_precision,event_type,sub_event_type
Unnamed: 0_level_1,String31,Int64,Int64,String31,String
1,18 March 2022,2022,1,Riots,Mob violence
2,18 March 2022,2022,1,Protests,Peaceful protest
3,18 March 2022,2022,1,Protests,Peaceful protest
4,18 March 2022,2022,1,Protests,Peaceful protest
5,18 March 2022,2022,1,Riots,Mob violence
6,18 March 2022,2022,1,Riots,Mob violence
7,18 March 2022,2022,2,Protests,Peaceful protest
8,18 March 2022,2022,2,Protests,Peaceful protest
9,18 March 2022,2022,2,Protests,Peaceful protest
10,18 March 2022,2022,2,Protests,Peaceful protest


In [36]:
function date_conversion!(df::DataFrame, kind::String)
	dates = df.event_date
    dates = Dates.Date.(dates, dateformat"d U y")
	if kind == "month"
		months = Dates.monthname.(dates)
    	df[!,"month"] = months
	elseif kind == "day_of_week"
		days_of_week = Dates.dayname.(dates)
    	df[!,"DayOfWeek"] = days_of_week
	elseif kind == "day"
		days = Dates.day.(dates)
    	df[!,"day"] = days
	else
		throw(ArgumentError(kind, "kind must be either \"month\", \"day_of_week\", or \"dat\". "))
	end
	return df 
end


date_conversion! (generic function with 1 method)

In [37]:
function filter_data(data::DataFrame, param::Symbol, value)
	df = subset(data, param => x -> x .== value)
	return df
end;

In [38]:
df_2019 = filter_data(df,:year, 2019);

In [39]:
date_conversion!(df_2019,"month");

In [40]:
date_conversion!(df_2019,"day");

In [41]:
function srednia(data,month)
    data = filter_data(data,:month,month)
    data = groupby(data, :day)
    data = combine(data, nrow)
    suma = sum(data.nrow)
    
    srednia = suma/nrow(data)
    return srednia
    
end

srednia (generic function with 1 method)

In [42]:
mean_April = srednia(df_2019,"April") #2019

21.733333333333334

In [43]:
mean_March = srednia(df_2019, "March")

22.967741935483872

In [44]:
mean_February = srednia(df_2019, "February")

40.25

In [45]:
mean_May = srednia(df_2019,"May") #2019

24.612903225806452

In [46]:
mean_June = srednia(df_2019,"June") #2019

26.366666666666667

In [47]:
mean_July = srednia(df_2019, "July")

28.258064516129032

Średnia z trzech miesięcy:

In [48]:
mean_basic = (mean_April+mean_May+mean_June+mean_February+mean_March+mean_July)/6

27.364784946236558

In [49]:
#odchylenie przecietne
odchylenieprzecietne = stdm([mean_April,mean_May, mean_June, mean_July, mean_March, mean_February], mean_basic) 

6.729681166026331

In [50]:
means = [mean_April, mean_May, mean_June, mean_July]
months = ["April", "May", "June", "July"]

plot(bar(x =months , y=means, mode="markers"))

In [51]:
wariancja = odchylenieprzecietne^2

45.288608596369514

In [52]:
wspolczynnik_zmiennosci = odchylenieprzecietne/mean_basic

0.24592486947177178

| admin1 | ilosc protestow | <-- na przestrzeni lat 2020/2021 (włącznie)


In [53]:
df_agri = df[(df.year .== 2020) .| (df.year .==2021),:];
df_agri

Unnamed: 0_level_0,event_date,year,time_precision,event_type,sub_event_type,admin1
Unnamed: 0_level_1,String31,Int64,Int64,String31,String,String
1,31 December 2021,2021,1,Riots,Mob violence,Punjab
2,31 December 2021,2021,1,Protests,Peaceful protest,Punjab
3,31 December 2021,2021,1,Protests,Peaceful protest,Punjab
4,31 December 2021,2021,1,Protests,Peaceful protest,Punjab
5,31 December 2021,2021,1,Protests,Peaceful protest,Punjab
6,31 December 2021,2021,1,Protests,Peaceful protest,Madhya Pradesh
7,31 December 2021,2021,1,Protests,Peaceful protest,Punjab
8,31 December 2021,2021,1,Protests,Peaceful protest,Punjab
9,31 December 2021,2021,1,Protests,Peaceful protest,Punjab
10,31 December 2021,2021,1,Protests,Peaceful protest,Punjab


In [54]:
df_agri_col = groupby(df_agri, :admin1);

In [55]:
df_agri_col = combine(df_agri_col, nrow .=> :num_of_protests) 

Unnamed: 0_level_0,admin1,num_of_protests
Unnamed: 0_level_1,String,Int64
1,Uttar Pradesh,1605
2,Maharashtra,1371
3,Punjab,4220
4,Madhya Pradesh,511
5,Telangana,1215
6,Kerala,939
7,Tamil Nadu,2255
8,Assam,2599
9,Rajasthan,540
10,Bihar,802


In [56]:
#Te stany musimy usunąć z naszego df ->
#Telangana, Jammu and Kashmir, Delhi, Goa, Jharkhand,Chandigarh, Chhattisgarh, Puducherry, Arunachal Pradesh, 
#Mizoram Manipur Meghalaya Uttarakhand Ladakh	Nagaland Sikkim Lakshadweep, Andaman and Nicobar Islands

In [57]:
wrong_cities = ["Telangana", "Jammu and Kashmir", "Delhi", "Goa", "Arunachal Pradesh","Jharkhand","Chandigarh", "Chhattisgarh", "Puducherry", "Arunachal","Pradesh", "Mizoram", "Manipur", "Meghalaya", "Uttarakhand" ,"Ladakh","Nagaland", "Sikkim", "Lakshadweep", "Andaman and Nicobar Islands"];

In [58]:
for city in wrong_cities
    df_agri_col = df_agri_col[df_agri_col.admin1 .!= city,:]
end

In [59]:
df_agri_col

Unnamed: 0_level_0,admin1,num_of_protests
Unnamed: 0_level_1,String,Int64
1,Uttar Pradesh,1605
2,Maharashtra,1371
3,Punjab,4220
4,Madhya Pradesh,511
5,Kerala,939
6,Tamil Nadu,2255
7,Assam,2599
8,Rajasthan,540
9,Bihar,802


In [60]:
df_agri_col = sort(df_agri_col, :admin1)

Unnamed: 0_level_0,admin1,num_of_protests
Unnamed: 0_level_1,String,Int64
1,Assam,2599
2,Bihar,802
3,Kerala,939
4,Madhya Pradesh,511
5,Maharashtra,1371
6,Punjab,4220
7,Rajasthan,540
8,Tamil Nadu,2255
9,Uttar Pradesh,1605


In [61]:
Nitrogen = [1.55,1.76, missing, 1.71, 1.04, 2.34, 2.05, 1.66, 1.27, 1.40, 1.57, 1.67, missing, 1.34, 1.76, 1.25, 1.67];

In [62]:
df_agri_col[!,"Nitrogen"] = Nitrogen;

LoadError: ArgumentError: New columns must have the same length as old columns

In [63]:
Phosphorus = [1.57,2.13,missing, 1.64,1.20,1.93,1.90,2.35,1.84,1.16,1.54,1.93,missing, 2.11,1.69,1.32,2.05];

In [64]:
df_agri_col[!,"Phosphorus"] = Phosphorus;

LoadError: ArgumentError: New columns must have the same length as old columns

In [65]:
Potassium = [2.52,2.14,missing, 2.60,2.05,1.63,2.54,1.98,2.48, 2.66,1.93,2.40,missing, 2.40,1.70, 2.21,2.04];

In [66]:
df_agri_col[!,"Potassium"] = Potassium;

LoadError: ArgumentError: New columns must have the same length as old columns

In [67]:
df_agri_col

Unnamed: 0_level_0,admin1,num_of_protests
Unnamed: 0_level_1,String,Int64
1,Assam,2599
2,Bihar,802
3,Kerala,939
4,Madhya Pradesh,511
5,Maharashtra,1371
6,Punjab,4220
7,Rajasthan,540
8,Tamil Nadu,2255
9,Uttar Pradesh,1605


In [68]:
agri_col_cleansed = dropmissing(df_agri_col)

Unnamed: 0_level_0,admin1,num_of_protests
Unnamed: 0_level_1,String,Int64
1,Assam,2599
2,Bihar,802
3,Kerala,939
4,Madhya Pradesh,511
5,Maharashtra,1371
6,Punjab,4220
7,Rajasthan,540
8,Tamil Nadu,2255
9,Uttar Pradesh,1605


In [69]:
trace = scatter(agri_col_cleansed, x=:Nitrogen, y=:num_of_protests, mode="markers")

In [70]:
plot(scatter(agri_col_cleansed, x=:Phosphorus, y=:num_of_protests, mode="markers"))

In [71]:
plot(scatter(agri_col_cleansed, x=:Potassium, y=:num_of_protests, mode="markers"))

In [72]:
x1 = agri_col_cleansed[!, "Nitrogen"]

LoadError: ArgumentError: column name :Nitrogen not found in the data frame

Podejscie ktore ma na celu spróbowac znaleźć korelację między procentowym poparciem partii rządzącej po wyborach w 2019

In [73]:
dataset
df_percent = dataset[(dataset.year .== 2019) .| (dataset.year .== 2019) .| (dataset.year .==2019),:];
df_percent

Unnamed: 0_level_0,event_date,year,time_precision,event_type,sub_event_type
Unnamed: 0_level_1,String31,Int64,Int64,String31,String
1,31 December 2019,2019,1,Protests,Peaceful protest
2,31 December 2019,2019,1,Protests,Peaceful protest
3,31 December 2019,2019,1,Protests,Peaceful protest
4,31 December 2019,2019,1,Protests,Peaceful protest
5,31 December 2019,2019,1,Protests,Peaceful protest
6,31 December 2019,2019,1,Protests,Peaceful protest
7,31 December 2019,2019,1,Protests,Peaceful protest
8,31 December 2019,2019,1,Protests,Peaceful protest
9,31 December 2019,2019,1,Protests,Peaceful protest
10,31 December 2019,2019,1,Protests,Peaceful protest


In [74]:
df_per_col = groupby(df_percent, :admin1);
df_per_col = combine(df_per_col, nrow .=> :num_of_protests) 
df_per_col[!, "admin1"]

35-element PooledArrays.PooledVector{String, UInt32, Vector{UInt32}}:
 "Uttar Pradesh"
 "Maharashtra"
 "Punjab"
 "Madhya Pradesh"
 "Telangana"
 "Kerala"
 "Karnataka"
 "Tamil Nadu"
 "West Bengal"
 "Jammu and Kashmir"
 "Andhra Pradesh"
 "Delhi"
 "Assam"
 ⋮
 "Tripura"
 "Arunachal Pradesh"
 "Mizoram"
 "Haryana"
 "Manipur"
 "Meghalaya"
 "Uttarakhand"
 "Ladakh"
 "Nagaland"
 "Sikkim"
 "Andaman and Nicobar Islands"
 "Dadra and Nagar Haveli and Daman and Diu"

In [75]:
percentage_votes = [45, 50, 54, 68, 2, 28, 25, 37, 55, 2, 55, 30, 52, 50, 70, 20, 50, 38, 65, 45, 58, 54, 46, 57, 5, 52, 33, 20, 64, missing, 54, 4, missing, missing, missing]

35-element Vector{Union{Missing, Int64}}:
 45
 50
 54
 68
  2
 28
 25
 37
 55
  2
 55
 30
 52
  ⋮
 57
  5
 52
 33
 20
 64
   missing
 54
  4
   missing
   missing
   missing

In [76]:
df_per_col[!, "percentage_votes"] = percentage_votes

35-element Vector{Union{Missing, Int64}}:
 45
 50
 54
 68
  2
 28
 25
 37
 55
  2
 55
 30
 52
  ⋮
 57
  5
 52
 33
 20
 64
   missing
 54
  4
   missing
   missing
   missing

In [77]:
df_per_col[!, :num_of_protests]

35-element Vector{Int64}:
 1349
  682
 2324
  424
 1121
  732
  858
 1604
 1730
 3541
 1078
  622
 1983
    ⋮
  528
  207
   44
  547
 1068
  184
  185
    7
   66
   38
   73
    1

In [78]:
df_per_col_1 = dropmissing(df_per_col)
x = df_per_col_1[!, "percentage_votes"]
y = df_per_col_1[!, "num_of_protests"]

31-element Vector{Int64}:
 1349
  682
 2324
  424
 1121
  732
  858
 1604
 1730
 3541
 1078
  622
 1983
    ⋮
  299
   91
  228
  467
  528
  207
   44
  547
 1068
  184
    7
   66

In [79]:
trace = scatter(df_per_col, x=:percentage_votes, y=:num_of_protests, mode="markers")
layout = Layout(xaxis_title="Procentowe poparcie partii rządzącej w wyborach", yaxis_title="Ilość protestów", title=attr(text="Ilość protestów w zależności od procentowego poparcia partii rządzącej", y=0.95,
x=0.5, xanchor="center", yanchor="top"))
plot(trace, layout)

In [81]:
plot(crosscor(x,y), Layout(yaxis_title="współczynnik korelacji", xaxis_title="lags", title_text ="Oscylacje współczynnika korelacji"))

In [None]:
cor(x,y)

-0.22295002308844136

In [None]:
corspearman(x,y)

-0.16271337315387754