-
Notifications
You must be signed in to change notification settings - Fork 1
/
datahandling.go
140 lines (116 loc) · 3.7 KB
/
datahandling.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
package main
import (
"context"
"fmt"
"google.golang.org/api/iterator"
"io/ioutil"
"log"
"net/mail"
"regexp"
"time"
"cloud.google.com/go/storage"
conf "github.com/eth-library/dataset-dj/configuration"
)
// simple "Database" for the metaArchives
// var archives map[string]metaArchive = make(map[string]metaArchive)
// File represents metadata about a file, not used so far
type File struct {
ID int32 `json:"id"`
Name string `json:"name"`
Location string `json:"location"`
Size int32 `json:"size"`
}
// emailIsValid if email is a valid format for a public address
// returns the parsed address and nil if valid
// or return an empty string and error if invalid
func emailIsValid(email string) (string, error) {
e, err := mail.ParseAddress(email)
if err != nil {
return "", err
}
// check that the address includes a public domain
emailRegex := regexp.MustCompile(`^[a-z0-9._%+\-]+@[a-z0-9.\-]+\.[a-z]{2,4}$`)
if emailRegex.MatchString(e.Address) != true {
return "", fmt.Errorf("email address must be public")
}
return e.Address, nil
}
// retrieve file names from local storage, from cloud storage and also from storages that
// are connected via API. This function acts as layer of abstraction such that the function
// calls in handlers.go don't need to be modified.
func retrieveAllFiles() ([]string, error) {
override := true
var allAvailableFiles []string
localFiles, err := retrieveFilesLocal(config.SourceLocalDir)
if err != nil {
return nil, err
}
allAvailableFiles = append(allAvailableFiles, localFiles...)
if len(runfig.SourceBucketList) > 0 || override {
cloudFiles, err := retrieveFilesCloud(runfig.StorageClient, config)
if err != nil {
return allAvailableFiles, err
}
allAvailableFiles = append(allAvailableFiles, cloudFiles...)
}
apiFiles, err := retriveFilesAPI()
if err != nil {
return allAvailableFiles, err
}
allAvailableFiles = append(allAvailableFiles, apiFiles...)
return allAvailableFiles, nil
}
// retrieve file names from local storage (a directory that may be accessed directly)
func retrieveFilesLocal(localSourceDir string) ([]string, error) {
if localSourceDir == "" {
return []string{}, nil
}
return listFileDir(localSourceDir)
}
// retrieve file names from cloud storage (google cloud bucket)
func retrieveFilesCloud(client *storage.Client, config *conf.ServerConfig) ([]string, error) {
ctx := context.Background()
var cloudFiles []string
ctx, cancel := context.WithTimeout(ctx, time.Second*10)
defer cancel()
// get bucket handler and obtain an iterator over all objects returned by query
bucket := client.Bucket(config.SourceBucketName)
it := bucket.Objects(ctx, &storage.Query{
Prefix: config.SourceBucketPrefix,
Delimiter: "/",
})
// Loop over all objects returned by the query
for {
attrs, err := it.Next()
if err == iterator.Done {
break
}
if err != nil {
return nil, fmt.Errorf("an error occured while retrieving a file from the cloud storage: %s", err)
}
if attrs.Name == config.SourceBucketPrefix { // make sure the directory is not listed as available file
continue
}
cloudFiles = append(cloudFiles, "cloud/"+attrs.Name)
}
return cloudFiles, nil
}
// retrieve file names from storages connected via API (not defined yet)
func retriveFilesAPI() ([]string, error) {
return []string{}, nil
}
// list names of files in the given directory
func listFileDir(dirPath string) ([]string, error) {
files, err := ioutil.ReadDir(dirPath)
if err != nil {
log.Fatal(err)
return nil, err
}
var filenames []string
for _, file := range files {
filenames = append(filenames, "local/"+file.Name())
//print filename and if its a direcory
// fmt.Println(file.Name(), file.IsDir())
}
return filenames, nil
}