# 4.4 Reshaping
- <http://modern-rstats.eu/descriptive-statistics-and-data-manipulation.html#reshaping-and-sprucing-up-data-with-tidyr>

In [26]:
library(tidyr)
library(tidyverse)

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.1     [32m✔[39m [34mpurrr    [39m 1.0.1
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mggplot2  [39m 3.4.2     [32m✔[39m [34mstringr  [39m 1.5.0
[32m✔[39m [34mlubridate[39m 1.9.2     [32m✔[39m [34mtibble   [39m 3.2.1
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


In [2]:
survey_data <- tribble(
  ~id, ~variable, ~value,
  1, "var1", 1,
  1, "var2", 0.2,
  NA, "var3", 0.3,
  2, "var1", 1.4,
  2, "var2", 1.9,
  2, "var3", 4.1,
  3, "var1", 0.1,
  3, "var2", 2.8,
  3, "var3", 8.9,
  4, "var1", 1.7,
  NA, "var2", 1.9,
  4, "var3", 7.6
)

head(survey_data)

id,variable,value
<dbl>,<chr>,<dbl>
1.0,var1,1.0
1.0,var2,0.2
,var3,0.3
2.0,var1,1.4
2.0,var2,1.9
2.0,var3,4.1


In [3]:
survey_data %>% 
  pivot_wider(id_cols = id,
              names_from = variable,
              values_from = value)

id,var1,var2,var3
<dbl>,<dbl>,<dbl>,<dbl>
1.0,1.0,0.2,
,,1.9,0.3
2.0,1.4,1.9,4.1
3.0,0.1,2.8,8.9
4.0,1.7,,7.6


In [4]:
unemp_lux_data <- rio::import(
      "https://raw.githubusercontent.com/b-rodrigues/modern_R/master/datasets/unemployment/all/unemployment_lux_all.csv"
                       )

In [6]:
str(unemp_lux_data)

'data.frame':	1770 obs. of  8 variables:
 $ division                    : chr  "Beaufort" "Beaufort" "Beaufort" "Beaufort" ...
 $ year                        : int  2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 ...
 $ active_population           : num  688 742 773 828 866 ...
 $ of_which_non_wage_earners   : num  85 85 85 80 96 87 90 85 84 86 ...
 $ of_which_wage_earners       : num  568 631 648 706 719 746 778 820 829 898 ...
 $ total_employed_population   : num  653 716 733 786 815 833 868 905 913 984 ...
 $ unemployed                  : num  35 26 40 42 51 60 39 69 85 85 ...
 $ unemployment_rate_in_percent: num  5.09 3.5 5.17 5.07 5.89 6.72 4.3 7.08 8.52 7.95 ...


unemp_lux_data$division%>%unique%>%sort

In [43]:
(
    unemp_lux_data 
    %>% filter( 
        year %in% seq(2013, 2017), 
        str_detect(division, ".*ange$"), 
        !str_detect(division, ".*Canton.*")
    ) 
    %>% select(division, year, unemployment_rate_in_percent)    
    %>% rowid_to_column()
    %>% pivot_wider(
            names_from = c(division, year),
            values_from = unemployment_rate_in_percent)    
)%>% head

rowid,Bertrange_2013,Bertrange_2014,Bertrange_2015,Differdange_2013,Differdange_2014,Differdange_2015,Dudelange_2013,Dudelange_2014,Dudelange_2015,⋯,Useldange_2015,Walferdange_2013,Walferdange_2014,Walferdange_2015,Wincrange_2013,Wincrange_2014,Wincrange_2015,Wormeldange_2013,Wormeldange_2014,Wormeldange_2015
<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,5.69,,,,,,,,,⋯,,,,,,,,,,
2,,5.65,,,,,,,,⋯,,,,,,,,,,
3,,,5.35,,,,,,,⋯,,,,,,,,,,
4,,,,13.22,,,,,,⋯,,,,,,,,,,
5,,,,,12.61,,,,,⋯,,,,,,,,,,
6,,,,,,11.43,,,,⋯,,,,,,,,,,


In [45]:
(
    unemp_lux_data 
    %>% tidyr::expand(division,
         year = c(year, 2016, 2017),
         .value = "unemployment_rate_in_percent") 
    %>% unite(".name", division, year, remove = FALSE)    
)%>%str

tibble [2,006 × 4] (S3: tbl_df/tbl/data.frame)
 $ .name   : chr [1:2006] "Beaufort_2001" "Beaufort_2002" "Beaufort_2003" "Beaufort_2004" ...
 $ division: chr [1:2006] "Beaufort" "Beaufort" "Beaufort" "Beaufort" ...
 $ year    : num [1:2006] 2001 2002 2003 2004 2005 ...
 $ .value  : chr [1:2006] "unemployment_rate_in_percent" "unemployment_rate_in_percent" "unemployment_rate_in_percent" "unemployment_rate_in_percent" ...


In [46]:
?tidyr::expand

0,1
expand {tidyr},R Documentation

0,1
data,A data frame.
...,"<data-masking> Specification of columns to expand or complete. Columns can be atomic vectors or lists.  To find all unique combinations of x, y and z, including those not present in the data, supply each variable as a separate argument: expand(df, x, y, z) or complete(df, x, y, z).  To find only the combinations that occur in the data, use nesting: expand(df, nesting(x, y, z)).  You can combine the two forms. For example, expand(df, nesting(school_id, student_id), date) would produce a row for each present school-student combination for all possible dates. When used with factors, expand() and complete() use the full set of levels, not just those that appear in the data. If you want to use only the values seen in the data, use forcats::fct_drop(). When used with continuous variables, you may need to fill in values that do not appear in the data: to do so use expressions like year = 2010:2020 or year = full_seq(year,1)."
.name_repair,"Treatment of problematic column names:  ""minimal"": No name repair or checks, beyond basic existence,  ""unique"": Make sure names are unique and not empty,  ""check_unique"": (default value), no name repair, but check they are unique,  ""universal"": Make the names unique and syntactic  a function: apply custom name repair (e.g., .name_repair = make.names for names in the style of base R).  A purrr-style anonymous function, see rlang::as_function() This argument is passed on as repair to vctrs::vec_as_names(). See there for more details on these terms and the strategies used to enforce them."


In [48]:
library(quantmod,quietly = TRUE)

In [49]:


# Set the symbol for Euro FX futures
symbol <- "6E"

# Set the start and end dates for the data
start_date <- as.Date("2020-01-01")
end_date <- as.Date("2020-12-31")

# Download the CFTC data using the getSymbols.cftc() function
cftc_data <- getSymbols.cftc(symbol, from = start_date, to = end_date)

# View the data
head(cftc_data)


ERROR: Error in getSymbols.cftc(symbol, from = start_date, to = end_date): could not find function "getSymbols.cftc"
