-
Notifications
You must be signed in to change notification settings - Fork 1
/
Workshop5_S20_Answers.Rmd
131 lines (96 loc) · 2.96 KB
/
Workshop5_S20_Answers.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
---
title: "Workshop5_S20"
output:
word_document: default
html_document: default
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, eval=FALSE)
```
```{r}
install.packages("dplyr")
library(dplyr)
```
##Read in CSV
```{r}
bechdel_data_original <- read.csv("bechdel_test_movies_w5.csv", na.strings =c("#N/A") )
bechdel_data <- bechdel_data_original
```
##Cross tab budget category and bechdel test after ordering budget category
```{r}
##tabulation
x <- table(bechdel_data$budgetcategory, bechdel_data$bechdeltest)
x
##ordering
bechdel_data$budgetcategory<- factor(bechdel_data$budgetcategory,
levels = c("low", "medium", "high"),
ordered=TRUE)
##tabulate again
x <- table(bechdel_data$budgetcategory, bechdel_data$bechdeltest)
x
```
##Summary Stats by Group
```{r}
## mean
mean(bechdel_data$budget, na.rm=TRUE)
mean(bechdel_data$budget[ bechdel_data$bechdeltest == "FAIL"]) ## replace to get mean budget for movies that failed the test
mean(bechdel_data$budget[ bechdel_data$bechdeltest=="PASS"]) ## replace to get mean budget for movies that passed the test
mean(bechdel_data$budget[bechdel_data$Year==2012]) ## replace to get mean budget for movies that were released 2012
head(bechdel_data)
```
##Summary statistics by groups another way
```{r}
by(bechdel_data$intgross, bechdel_data$bechdeltest, mean, na.rm=TRUE)
```
##Introducing Dplyr
```{r}
##Some of the things we can do with dplyr
arrange(bechdel_data, desc(budget))
# Using pipes
bechdel_data %>% arrange(desc(budget))
bechdel_data %>% filter(budgetcategory=="low")
arranged_data <- arrange(bechdel_data, desc(budget))
highest_pass_movies <- filter(arranged_data, bechdeltest=="PASS")
##Combining steps
highest_pass_movies <- bechdel_data %>% arrange(desc(budget)) %>% filter(bechdeltest=="PASS")
highest_pass_movies
highest_pass_movies %>% filter(Year>2000)
bechdel_data %>% select(movietitle, ends_with("gross"))
```
## Dplyr Groupings
```{r}
summary1 <- group_by(bechdel_data, budgetcategory, bechdeltest)
summary1
class(summary1)
class(bechdel_data)
summary1 <- summarize(summary1, count = n() )
#combining the steps
budgetcat_bytest <- bechdel_data %>%
group_by(budgetcategory, bechdeltest) %>%
summarize(avgintgross=mean(intgross), count=n())
budgetcat_bytest
```
## Now let's do counts for year by budget category
```{r}
year_bybudgetcat <- bechdel_data %>%
group_by( "") %>%
summarize( "" )
year_bybudgetcat
```
##average profit by budget category and bechdel test
```{r}
budgetcat_bytest_2 <- bechdel_data %>%
group_by( "" ) %>%
summarize( "" )
budgetcat_bytest_2
```
## Combine multiple stats
```{r}
budgetcat_bytest <- bechdel_data %>%
group_by( "" ) %>%
summarize( "" )
budgetcat_bytest
```
```{r}
write.csv(budgetcat_bytest, "BudgetCat_by_TestResults.csv", row.names=F)
```