-
Notifications
You must be signed in to change notification settings - Fork 0
/
util.php
151 lines (115 loc) · 3.58 KB
/
util.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
<?php
function slug($string) {
return strtolower(trim(preg_replace('/[^A-Za-z0-9-]+/', '-', $string)));
}
function pad_int($int, $padding = 0) {
return sprintf('%0' . $padding . 'd', $int);
}
function random_sleep() {
// Stall to prevent hammering the server and make requests look more natural
// sleep(rand(1, 4));
sleep(1);
}
function get_data_from_csv($filename, $index = 0, $delimiter = ',') {
if (!file_exists($filename)) die("$filename does not exist.");
$file = fopen($filename, 'r');
$csv = null;
while (($line = fgetcsv($file, 0, $delimiter)) !== FALSE) {
@$csv[$line[$index]] = $line;
}
fclose($file);
return $csv;
}
function utf8_split($str, $len = 1)
{
$arr = array();
$strLen = mb_strlen($str, 'UTF-8');
for ($i = 0; $i < $strLen; $i++)
{
$arr[] = mb_substr($str, $i, $len, 'UTF-8');
}
return $arr;
}
function utf8_ord($u) {
$k = mb_convert_encoding($u, 'UCS-2LE', 'UTF-8');
$k1 = ord(substr($k, 0, 1));
$k2 = ord(substr($k, 1, 1));
return $k2 * 256 + $k1;
}
function double_quote($text) {
return '"' . $text . '"';
}
function html_sanitize($text) {
// Decode HTML entities, replace multiple UTF-8 spaces with a single space and trim the result
return trim(preg_replace('/\s+/', ' ', str_replace("\xC2\xA0", ' ', html_entity_decode($text))));
}
function csv_sanitize($text) {
return csv_escape_double_quotes(html_sanitize($text));
}
function csv_escape_double_quotes($text) {
return str_replace('"', '""', $text);
}
function write($filename, $content) {
make_directory_structure($filename);
return file_put_contents($filename, $content);
}
function read($filename) {
return file_get_contents($filename);
}
function make_directory_structure($filename) {
// Make directory structure
@mkdir(dirname($filename), 0777, true);
}
function cached_url_does_not_exist($filename) {
return !file_exists($filename);
}
function cached_file_does_not_exist($filename) {
return !file_exists($filename);
}
function cache_file($filename, $contents = false) {
make_directory_structure($filename);
if (file_exists($filename)) {
$contents = file_get_contents($filename);
} else {
file_put_contents($filename, $contents);
}
return $contents;
}
function cache_url($filename, $url = false) {
make_directory_structure($filename);
if (file_exists($filename)) {
$contents = file_get_contents($filename);
} else {
$contents = get_url($url);
file_put_contents($filename, $contents);
}
return $contents;
}
function get_url($url) {
random_sleep();
$curl = curl_init();
curl_setopt_array($curl, array(
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => "",
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 30,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => "GET",
CURLOPT_HTTPHEADER => array(
"Accept: application/json, text/plain, */*",
"Cache-Control: no-cache",
"Connection: keep-alive",
//"Host: www.kanshudo.com",
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:58.0) Gecko/20100101 Firefox/58.0"
),
));
$response = curl_exec($curl);
$err = curl_error($curl);
curl_close($curl);
if ($err) {
return "cURL Error #:" . $err;
} else {
return $response;
}
}