# 一、导入导出数据

## 1. 手动输入数据

### 输入向量

In [4]:
#创建数值向量
numeric_values <- c(1, 3, 5, 8, 9)


In [6]:
#显示向量类的
class(numeric_values)

In [8]:
#显示数值向量
numeric_values

In [9]:
#返回向量中的第二个元素
numeric_values[4]

In [13]:
#创建字符向量
char_values <- c("Bob", "Mike", "Tony", "Andy")

#显示变量类别
class(char_values)

### 输入数据框

In [1]:
#创建数据框
df <- data.frame(team=c("A", "A", "B", "B", "C"),
                 points=c(12, 15, 17, 24, 27),
                 assists=c(4, 7, 7, 8, 12))

#显示数据框内容
df

team,points,assists
A,12,4
A,15,7
B,17,7
B,24,8
C,27,12


In [3]:
#显示数据框的类别
class(df)

#显示第4行第3列的值
df[4, 3]


### 输入矩阵

In [4]:
#创建2行5列矩阵
points=c(12, 15, 17, 24, 27)
assists=c(4, 7, 7, 8, 12)

#column bind the two vectors together to create a matrix
mat <- cbind(points, assists)

#display matrix
mat


#display class of mat
class(mat)

#return value in fourth row and second column
mat[4, 2]

assists 


points,assists
12,4
15,7
17,7
24,8
27,12


## 2. 保存和加载RDA文件

In [5]:
save(df, file='my_data.rda')

In [None]:
load(file='my_data.rda')

### 实例

In [6]:
#make this example reproducible
set.seed(0)

#create data frame
df <- data.frame(x=rnorm(100),
                 y=rnorm(100),
                 z=rnorm(100))

#view data frame
head(df)

x,y,z
1.2629543,0.7818592,-1.0457177
-0.3262334,-0.7767766,-0.8962113
1.3297993,-0.6159899,1.2693872
1.2724293,0.0465803,0.5938409
0.4146414,-1.1303858,0.7756343
-1.53995,0.5767188,1.5573704


In [7]:
#display working directory
getwd()

In [None]:
#remove data frame from current R environment
rm(df)

## 3.导入csv文件

假设文件路径是：  C:\Users\Bob\Desktop\data.csv

1. 使用base R中的read.csv (比较慢, 小数据集可以)

In [None]:
data1 <- read.csv("C:\\Users\\Bob\\Desktop\\data.csv", header=TRUE, stringsAsFactors=FALSE)  #tringsAsFactors=FALSE 后面版本不用
str(data1)

In [None]:
#Use read_csv from readr package (2-3x faster than read.csv)

library(readr)

data2 <- read_csv("C:\\Users\\Bob\\Desktop\\data.csv")
str(data2)

In [None]:
#Use fread from data.table package (2-3x faster than read_csv)

library(data.table)

data3 <- fread("C:\\Users\\Bob\\Desktop\\data.csv")
str(data3)


## 4.从URL读取CSV  

In [None]:
#Method 1: Use Base R

data <- read.csv('https://website.com/data.csv')

head(data)

In [None]:
#Method 2: Use data.table Package

library(data.table)

data <- fread('https://website.com/data.csv')
head(data)

In [None]:
#Method 3: Use readr Package

library(readr)

data <- read_csv('https://website.com/data.csv')
head(data)

## 5. 合并多个 CSV 文件

In [None]:
#加载合并同个文件夹下的多个CSV文件 
df <- list.files(path='C:/my/path/to/files') %>% 
  lapply(read_csv) %>% 
  bind_rows

#### Step 1: Create & Export Multiple Data Frames
First, we’ll use the following code to create and export three data frames to CSV files:

In [10]:

#create three data frames
df1 <- data.frame(points=c(4, 5, 5, 6, 8, 9),
                  assists=c(3, 2, 4, 4, 6, 3))

df2 <- data.frame(points=c(2, 10, 14, 15),
                  assists=c(3, 2, 9, 3))

df3 <- data.frame(points=c(6, 8, 9),
                  assists=c(10, 6, 4))

#export all three data frames to CSV files
write.csv(df1, './df1.csv', row.names=FALSE)
write.csv(df2, './df2.csv', row.names=FALSE)
write.csv(df3, './df3.csv', row.names=FALSE)

#### Step 2: Import & Merge Multiple CSV Files
Next, we’ll use the following code to import and merge all three CSV files into one data frame in R:

In [44]:
library(dplyr)
library(readr)

#加载合并同个文件夹下的多个CSV文件 
df <- list.files(pattern="*.csv") %>% 
  lapply(read_csv) %>% 
  bind_rows 

#view resulting data frame
df

Parsed with column specification:
cols(
  points = col_double(),
  assists = col_double()
)


points,assists
6,10
8,6
9,4


#### Download Files from the Internet Using R


In [None]:

#define URL location
url<-"https://data.cityofnewyork.us/api/views/brsj-szf5/rows.csv?accessType=DOWNLOAD"
Step 2: Define Destination for File
Next, I’ll define the destination to save the file to:

#define destination for file
destfile <- "C:/Users/Bob/Downloads"
Step 3: Download and View File
Next, I’ll use the following code to download the file:

#download file and save in specified destination
download.file(url, destfile)

## 6.readLines()

#### 例子1

In [None]:
#read every line from some_data.txt
readLines("C:/Users/Bob/Documents/some_data.txt")

[1] "The first line of the file"  "The second line of the file"
[3] "The third line of the file"  "The fourth line of the file"
[5] "The fifth line of the file"  "The sixth line of the file"  

#### 例子2

In [None]:
#read every line from some_data.txt
my_data <- readLines("C:/Users/Bob/Documents/some_data.txt")

#create data frame
df = data.frame(values=my_data)

#view data frame
df

                       values
1  The first line of the file
2 The second line of the file
3  The third line of the file
4 The fourth line of the file
5  The fifth line of the file
6  The sixth line of the file

In [None]:
#read first 4 lines from some_data.txt
my_data <- readLines("C:/Users/Bob/Documents/some_data.txt", n=4)

#display second line only
my_data[2]

[1] "The second line of the file"

## 7.读取Zip Files

In [None]:
library(readr)

#import data1.csv located within my_data.zip
df <- read_csv(unzip("my_data.zip", "data1.csv"))

## 8.查看文件list.files() 

In [None]:
#display all files in my_data_files folder
list.files('C:/Users/bob/Documents/my_data_files')

[1] "df1.csv"       "df2.csv"       "df3.csv"   "more_data.txt" "some_data.txt"

In [None]:
#display total number of files in my_data_files folder
length(list.files('C:/Users/bob/Documents/my_data_files'))

[1] 5

In [None]:
#display first three files in my_data_files folder
list.files('C:/Users/bob/Documents/my_data_files')[1:3]

[1] "df1.csv"       "df2.csv"       "df3.csv"

In [None]:
#display all files with CSV extension in my_data_files folder
list.files('C:/Users/bob/Documents/my_data_files', pattern='csv')

[1] "df1.csv" "df2.csv" "df3.csv"

In [None]:
#display all files that contain 'data' in file name
list.files('C:/Users/bob/Documents/my_data_files', pattern='data')

[1] "more_data.txt" "some_data.txt"

## 9.导入Excel 文件

In [None]:
#install and load readxl package
install.packages('readxl')
library(readxl)

#import Excel file into R
data <- read_excel('C:\\Users\\Bob\\Desktop\\data.xlsx')

## 10.导入 SPSS文件

In [None]:
library(haven)
data <- read_sav('C:/Users/bob/Downloads/healthdata.sav')

## 11.导出数据框

In [None]:
#Use write.csv from base R
write.csv(df, "C:\\Users\\Bob\\Desktop\\data.csv", row.names=FALSE)

In [None]:
#Use write_csv from reader package
library(readr)

write_csv(df, "C:\\Users\\Bob\\Desktop\\data.csv")

In [None]:
#Use fwrite from data.table package（ a faster way）
library(data.table)

fwrite(df, "C:\\Users\\Bob\\Desktop\\data.csv")

## 12.数据框导出excel

In [None]:
#install and load writexl package
install.packages('writexl')
library(writexl)

write_xlsx(df, 'C:\\Users\\Bob\\Desktop\\data.xlsx')

## 13.导出excel多个数据表

In [None]:
#define data frames
df1 = data.frame(playerID=c(1, 2, 3, 4),
                 team=c('A', 'B', 'B', 'C'))

df2 = data.frame(playerID=c(1, 2, 3, 4),
                 rebounds=c(7, 8, 8, 14))

df3 = data.frame(playerID=c(1, 2, 3, 4),
                 points=c(19, 22, 25, 29))

In [None]:
library(openxlsx)

#define sheet names for each data frame
dataset_names <- list('Sheet1' = df1, 'Sheet2' = df2, 'Sheet3' = df3)

#export each data frame to separate sheets in same Excel file
openxlsx::write.xlsx(dataset_names, file = 'mydata.xlsx') 

## 14.导出多个图形到pdf

#### 多图保存到一页

In [None]:
#specify path to save PDF to
destination = 'C:\\Users\\Bob\\Documents\\my_plots.pdf'

#open PDF
pdf(file=destination)

#specify to save plots in 2x2 grid
par(mfrow = c(2,2))

#save plots to PDF
for (i in 1:4) {   
  x=rnorm(i)  
  y=rnorm(i)  
  plot(x, y)   
}

#turn off PDF plotting
dev.off() 

#### 多图保存到不同页

In [None]:
#specify path to save PDF to
destination = 'C:\\Users\\Bob\\Documents\\my_plots.pdf'

#open PDF
pdf(file=destination)

#save plots to PDF
for (i in 1:4) {   
  x=rnorm(i)  
  y=rnorm(i)  
  plot(x, y)   
}

#turn off PDF plotting
dev.off() 

## 15.使用colClasses快速导入数据

In [None]:
#import CSV file
df <- read.csv('my_data.csv',
               colClasses=c('character', 'numeric', 'numeric'))

#view class of each column in data frame
str(df)

'data.frame':	14 obs. of  3 variables:
 $ team    : chr  "Mavs" "Spurs" "Hornets" "Rockets" ...
 $ points  : num  91 99 104 103 105 88 89 93 96 99 ...
 $ rebounds: num  33 23 26 25 25 26 29 30 34 23 ...

In [None]:
#import CSV file
df <- read.csv('my_data.csv',
               colClasses=c('character'))

#view class of each column in data frame
str(df)

'data.frame':	14 obs. of  3 variables:
 $ team    : chr  "Mavs" "Spurs" "Hornets" "Rockets" ...
 $ points  : chr  "91" "99" "104" "103" ...
 $ rebounds: chr  "33" "23" "26" "25" ...

- character: “hey”, “there”, “world”
- complex: as.complex(-1), 4i
-numeric: as.integer(20), 3L
-integer: 4, 12, 158
-logical: TRUE, FALSE

## 16.使用read.delim（）函数将分隔符的文本文件读入 R。

In [None]:
read.delim(file, header=TRUE, sep=’\t’)


## 17.导出数据框

In [None]:
#export data frame to Desktop
write.table(df, file='C:\\Users\\bob\\Desktop\\data.txt')

## 18.导出Excel file


In [None]:
write.xlsx(df, 'my_data.xlsx', sheetName = 'basketball_data', row.names=FALSE)

## 19.工作目录操作 

In [None]:
#display current working directory
getwd()

[1] "C:/Users/Bob/Desktop"

In [None]:
#set working directory
setwd('C:/Users/Bob/Documents')

In [None]:
#check if file 'analysis3.R' exists in working directory
'analysis3.R' %in% list.files()
[1] TRUE