/
iotr.py
220 lines (172 loc) · 6.41 KB
/
iotr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
#!/usr/bin/env python
# encoding: utf-8
"""
untitled.py
Created by Dave Race on 2013-03-30.
Copyright (c) 2013 confusedpublic. All rights reserved.
"""
import os
import sys
import id3reader
import string
from mutagen.id3 import ID3, TIT2, TDRL, TDES, WFED, TGID, TPUB
"""
Functions
"""
# Function to change a date in the form Day, d Mon YY into YYYY-MM-DD
def fixdate(date_str):
# We split the date string by spaces, and keep the three bits we need (d Mon YY)
# So we get:
# date_data[0] = Day name abbreviation
# date_data[1] = Date (single digit)
# date_data[2] = Month abreviation
# date_data[3] = Year, double digits
date_data = date_str.split(' ')
# Now pass that info through the date function to format it.
# First, make a dictionary that allows for quick swapping from month abbreviation to number:
month_d = {
"Jan" : "01", "Feb" : "02", "Mar" : "03", "Apr" : "04", "May" : "05", "Jun" : "06",
"Jul" : "07", "Aug" : "08", "Sep" : "09", "Oct" : "10", "Nov" : "11", "Dec" : "12"
}
# And now change the date to a digits
date_data[2] = month_d[date_data[2]]
# Now make sure the day has a preceeding zero
# First have to fudge the day into an int (it's a unicode, rather than string for some reason)
date_data[1] = int(float(str(date_data[1])))
if date_data[1] < 10:
date_data[1] = "0" + str(date_data[1])
# And now make sure the the year is four digits, with a big fudge on what year we add the 99 for.
if date_data[3] > "12":
date_data[3] = "19" + date_data[3]
else:
date_data[3] = "20" + date_data[3]
# Now make a string with the info... YYYY-MM-DD
ep_date = "%s-%s-%s" % (date_data[3], date_data[2], date_data[1])
return ep_date
"""
Code
"""
# Set up the variables...
# General podcast directory...
gen_podcasts_dir = "/Users/DAVE/Music/Podcasts/In Our Time Archive_ "
# First, decide which archive I'm going to edit; others are commented out.
# Science:
#iot = "iots"
#podcast_dir = gen_podcasts_dir + "Science/"
#podcast_ident = "In Our Time Archive: Science"
# Culture:
#iot = "iotc"
#podcast_dir = gen_podcasts_dir + "Culture/"
#podcast_ident = "In Our Time Archive: Culutre"
# History:
#iot = "ioth"
#podcast_dir = gen_podcasts_dir + "History/"
#podcast_ident = "In Our Time Archive: History"
# Philosophy:
#iot = "iotp"
#podcast_dir = gen_podcasts_dir + "Philosophy/"
#podcast_ident = "In Our Time Archive: Philosophy"
# Religion:
iot = "iotr"
podcast_dir = gen_podcasts_dir + "Religion/"
podcast_ident = "In Our Time Archive: Religion"
info_f_path = iot + ".txt"
ep_file = open(info_f_path, "r")
ep_block = ep_file.readlines()
ep_file.close()
for i in range(len(ep_block)):
ep_block[i] = ep_block[i].decode('utf-8').strip()
episodes = [] # This will contain the all the episode info in embedded lists
ep_info = [] # This will contain the processed information for each ep
j = 0 # This just counts
# Get number of episodes
for i in range(len(ep_block)):
if ep_block[i] == "</li>":
j += 1
# Make the list with each eliment for an episode
for i in range(j):
episodes.append(i)
ep_info.append(i)
episodes[i] = [] # Make the embedded list for each ep
ep_info[i] = {} # Make the embedded dictionary for each ep
j = 0
for i in range(len(ep_block)):
episodes[j].append(ep_block[i])
if ep_block[i] == "</li>":
j += 1
trans_table = {
ord(u"’") : u"'",
ord(u"‘") : u"'",
ord(u"–") : u"-",
ord(u"é") : u"e",
ord(u"“") : u"\"",
ord(u"”") : u"\""
}
fix_fnames_punc = ["'", ":"]
for i in range(len(episodes)):
for j in range(len(episodes[i])):
if j == 2:
ep_info[i]["title"] = iot.upper() + ": " + episodes[i][j]
ep_info[i]["filename"] = iot.upper() + "_ " + episodes[i][j] + ".mp3"
if j == 5:
ep_info[i]["date"] = fixdate(episodes[i][j])
if j == 12:
ep_info[i]["description"] = episodes[i][j]
#print('\n'.join('{}: {}'.format(*k) for k in enumerate(ep_info)))
# All this translation stuff fails on an episode with é in the filename.
# Lets see if I can access the files' info & ID3 tags:
for i in range(len(ep_info)):
# Try first where : and ' are replaced with _
f_name = ep_info[i]["filename"].translate(trans_table)
for j in range(len(fix_fnames_punc)):
if fix_fnames_punc[j] in f_name:
f_name = f_name.replace(fix_fnames_punc[j], "_")
if "IOT_ " in f_name: #This is a fudge for eps which're named in general, rather than specific archives
f_name = f_name.replace("IOT_ ", "")
podcast_path = podcast_dir + f_name
try:
# Use mutagen to open the file info
pod_info = ID3(podcast_path)
# Update the description; have to translate any unicode away
descript = ep_info[i]["description"].translate(trans_table)
pod_info.add(TDES(encoding=0, text=descript))
# Update the release time
pod_info.add(TDRL(encoding=0, text=ep_info[i]["date"]))
# Update the podcast identifier
pod_info.add(TGID(encoding=0, text=podcast_ident))
# Might want to update the comment if it's not there...
passed = True
# Small worry; the file name might actually include a ' (cause it's not a unicode one for some reason)
# So just check if the file exists with the ' in it
except:
try:
f_name = ep_info[i]["filename"].translate(trans_table)
# Assuming ' is in the file name; only need to replace :
if ":" in f_name:
f_name = f_name.replace(":", "_")
if "IOT_ " in f_name: #This is a fudge for eps which're named in general, rather than specific archives
f_name = f_name.replace("IOT_ ", "")
podcast_path = podcast_dir + f_name
# Use mutagen to open the file info
pod_info = ID3(podcast_path)
# Update the description; have to translate any unicode away
descript = ep_info[i]["description"].translate(trans_table)
pod_info.add(TDES(encoding=0, text=descript))
# Update the release time
pod_info.add(TDRL(encoding=0, text=ep_info[i]["date"]))
# Update the podcast identifier
pod_info.add(TGID(encoding=0, text=podcast_ident))
# Might want to update the comment if it's not there...
passed = True
# Give up; blame it on not finding the file
except:
print "WARNING!:\nCouldn't find the file for podcast " + ep_info[i]["title"] + "\n\
Used filename: " + ep_info[i]["filename"] + "\n Index: " + str(i) + "\n WARNING!"
passed = False
pod_info.save()
# Show that I've at least updated *something*
if passed == True:
print "Updated " + ep_info[i]["title"] + " with the following info:\n\
Description: " + str(pod_info["TDES"]) + "\n\
Released Date: " + str(pod_info["TDRL"]) + "\n\
Podcast Identity: " + podcast_ident