protocol_types = {"icmp":0,
#cat kddcup.data_10_percent_corrected | cut -d"," -f3 | sort | uniq
services = {"auth":0,
#cat kddcup.data_10_percent_corrected | cut -d"," -f4 | sort | uniq
flags = {"OTH":0,
filename = "normal"
new_filename = "preprocessed_" + filename
file = open(filename,"r")
new_file = open(new_filename,"w")
for line in file:
line = line.replace("\n","")
tokens = line.split(",")
tokens[1] = str(protocol_types[tokens[1]])
tokens[2] = str(services[tokens[2]])
tokens[3] = str(flags[tokens[3]])
new_file.write(",".join(tokens) + "\n")
print "File created: " + new_filename
