From 622769a0ed0beda916dc886d12f4e8070d9d06ca Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 2 Feb 2015 21:42:32 +0530 Subject: [PATCH] Adding some more utility python script --- Counter.py | 18 ++++++++++++++ README | 22 ++++++++++++++++- compare_files.py | 40 +++++++++++++++++++++++++++++++ count_all_mp3_files_on_machine.py | 24 +++++++++++++++++++ fetch_email_address.py | 23 ++++++++++++++++++ fetch_number_of_lines.py | 21 ++++++++++++++++ fetch_url_in_a_web_page.py | 17 +++++++++++++ list_all_specific_type_files.py | 22 +++++++++++++++++ read_textfile.py | 24 +++++++++++++++++++ search_text_file_for_string.py | 17 +++++++++++++ simple_downloader.py | 26 ++++++++++++++++++++ word_frequency_in_python.py | 23 ++++++++++++++++++ 12 files changed, 276 insertions(+), 1 deletion(-) create mode 100644 Counter.py create mode 100644 compare_files.py create mode 100644 count_all_mp3_files_on_machine.py create mode 100644 fetch_email_address.py create mode 100644 fetch_number_of_lines.py create mode 100644 fetch_url_in_a_web_page.py create mode 100644 list_all_specific_type_files.py create mode 100644 read_textfile.py create mode 100644 search_text_file_for_string.py create mode 100644 simple_downloader.py create mode 100644 word_frequency_in_python.py diff --git a/Counter.py b/Counter.py new file mode 100644 index 00000000000..6546badbdc9 --- /dev/null +++ b/Counter.py @@ -0,0 +1,18 @@ +__author__ = 'tusharsappal' +## This script uses the dictionary utility of the python and counts the number of the characters in the string and their frequency +def Counter(str): + d=dict() + for c in str: + if c not in d: + d[c]=1 + else : + d[c]=d[c]+1 + + return d + + +## Just replace the method argument with the string for which you want to check + + +temp=Counter("Replace with the string for which you want to check ") +print temp \ No newline at end of file diff --git a/README b/README index c067f4eb046..ce8e4a6bfe1 100644 --- a/README +++ b/README @@ -34,4 +34,24 @@ script_count.py - This scans my scripts directory and gives a count of the diffe script_listing.py - This will list all the files in the given directory, it will also go through all the subdirectories as well -testlines.py - This very simple script open a file and prints out 100 lines of whatever is set for the line variable \ No newline at end of file +testlines.py - This very simple script open a file and prints out 100 lines of whatever is set for the line variable + +compare_files.py - This script compares two text files + +count_all_mp3_files_on_machine.py - This script aims to provide a count of mp3 files present in a given directory + +fetch_email_address.py - This script fetches the email address from the string provided + +fetch_number_of_lines.py - This script fetches the number of lines from the text file provided + +fetch_url_in_a_web_page.py - This script fetches the url/ urls present in a webpage + +list_all_specific_type_files.py - This script list all the specific file types present in a directory + +read_textfile.py - This script aims to read text file provided as an argument + +search_text_file_for_string.py - This script aims to search for a string present in a text file + +simple_downloader.py - This script aims to simulate a simple down loader + +word_frequency.py - This script prints the word frequency in the text provided as an argument diff --git a/compare_files.py b/compare_files.py new file mode 100644 index 00000000000..1e09e0af824 --- /dev/null +++ b/compare_files.py @@ -0,0 +1,40 @@ +## this script compares the two text files +## first line of defence is to match the number of line in the file +## second line of defence is to store the content in two arrays and then compare +__author__ = 'tusharsappal' + +def compare_two_files(str_1,str_2): + print "Comparing starts" + print"First fetching the number of line" + with open(str_1) as fin : + lines = sum (1 for line in fin) + with open(str_2) as fin_2: + lines_2 = sum(1 for line in fin_2) + + if(lines== lines_2): + print "First Line of defence is passed" + ins = open (str_1,"r") + array = [] + for line_1 in ins: + array.append(line_1) + ins.close() + ins_2 = open (str_2,"r") + array_2=[] + for line_2 in ins_2: + array_2.append(line_2) + ins_2.close() + if (array==array_2): + print "In the Second Line of Defence the files have passed" + else : + print "In the second line of defence the files have failed" + + + else : + print "First line of defence is not passed" + + + +## replace the first parameter with the first file path and the second paramter with the second file path +compare_two_files("path to the first file ","path to the second file") + + diff --git a/count_all_mp3_files_on_machine.py b/count_all_mp3_files_on_machine.py new file mode 100644 index 00000000000..6f8d00fc6df --- /dev/null +++ b/count_all_mp3_files_on_machine.py @@ -0,0 +1,24 @@ +__author__ = 'tusharsappal' + + +import os + +## This script fetches the count of the total number of mp3 [Matter of fact you can change the type of the files to be fetched by changing the endswith type]files in the listed directory + +def count_all_mp3_files_on_machine(): + count = 0 + for (dirname,dirs,files) in os.walk('Replace the directory path where to serach like C:/ or / on UNIX like machines'): + for filename in files: + if filename.endswith(".mp3"): + count =count+1 + thefile = os.path.join(dirname,filename) + print "The Name is ",thefile, "and the size is ", os.path.getsize(thefile) + + + + + + print "The total number of mp3 files on the system are ",count + + +count_all_mp3_files_on_machine() \ No newline at end of file diff --git a/fetch_email_address.py b/fetch_email_address.py new file mode 100644 index 00000000000..491826148e2 --- /dev/null +++ b/fetch_email_address.py @@ -0,0 +1,23 @@ + +__author__ = 'tusharsappal' + + +## This script fetches the email addresses from the string provided , the script only fetches the email addresses of the format alphanumeric@alphabets + +import re +def fetch_email_address(str): + fetcher=re.findall('[a-zA-Z0-9]\S+@\S+[a-zA-Z]]',str) + if len(fetcher)>0: + print fetcher + + + +## Replace the method argument with the string to be parsed, or you can modify the script to read the data from the text file + + +fetch_email_address("Replace this argument with the string to be parsed") + + + + + diff --git a/fetch_number_of_lines.py b/fetch_number_of_lines.py new file mode 100644 index 00000000000..f61a7ff7ddb --- /dev/null +++ b/fetch_number_of_lines.py @@ -0,0 +1,21 @@ +## fetching the number of lines from a text file + +__author__ = 'tusharsappal' +def fetch_number_of_lines(str): + print "Fetching starts from this point" + with open(str) as fin: + lines = sum (1 for line in fin) + print "Number of lines is " + print lines + + + + + +## replace the argument in the function call to the path of the file + +fetch_number_of_lines("The path to the text file") + + + + diff --git a/fetch_url_in_a_web_page.py b/fetch_url_in_a_web_page.py new file mode 100644 index 00000000000..c52562271e8 --- /dev/null +++ b/fetch_url_in_a_web_page.py @@ -0,0 +1,17 @@ +import urllib +import re +__author__ = 'tusharsappal' + +## Enter the name of the url and this small snippet will fetch all the email links present in that web page +def find_hyper_links_in_page(): + url = raw_input("Enter the url to be searched--") + html =urllib.urlopen(url).read() + links =re.findall('href="(http://.*?)"', html) + for link in links: + print link + + + + + +find_hyper_links_in_page() diff --git a/list_all_specific_type_files.py b/list_all_specific_type_files.py new file mode 100644 index 00000000000..91844ebb63a --- /dev/null +++ b/list_all_specific_type_files.py @@ -0,0 +1,22 @@ +__author__ = 'tusharsappal' +import os + +## This program searches in the current working directory and lists all the python files + + + +def print_the_list_of_python_files(): + cwd = os.getcwd() + print "The current working directory is ", cwd + cwd_2 = os.listdir(cwd) + for file in cwd_2: + if file.endswith(".py"): ## To change the type of the file fetched change the ends with to the specific file name + + print "The pyhton file is ", file + + + + + +print_the_list_of_python_files() + diff --git a/read_textfile.py b/read_textfile.py new file mode 100644 index 00000000000..5abf3f8254f --- /dev/null +++ b/read_textfile.py @@ -0,0 +1,24 @@ +# declaring a global array_store to store the data temporarily +__author__ = 'tusharsappal' +array_store=[] +def readFile(str): + print "reading starts from this part" + f=open(str,"r") + global array_store + + for line in f: + line=line.rstrip('\n') + array_store.append(line) + + for l in array_store: + print l + + return ; + + +# enter the path in side the readFile method call statement +readFile("Enter the text file path separated by / slashes ") + + + + diff --git a/search_text_file_for_string.py b/search_text_file_for_string.py new file mode 100644 index 00000000000..80ea5bdf128 --- /dev/null +++ b/search_text_file_for_string.py @@ -0,0 +1,17 @@ +## This script searches a particular string in the text file +__author__ = 'tusharsappal' +def read_file(str,string_to_be_searched): + print "The reading starts here" + with open(str,"r") as input_data: + for line in input_data: + if line.find(string_to_be_searched)>-1 : + print "String found" + break + + + +## replace the first argument with the file path separated with forward slashes and second argument with string to be searched +read_file("path_to_the_text_file","string_to_be_searched") + + + diff --git a/simple_downloader.py b/simple_downloader.py new file mode 100644 index 00000000000..1b5e0a904b1 --- /dev/null +++ b/simple_downloader.py @@ -0,0 +1,26 @@ +__author__ = 'tusharsappal' + +import urllib +def simple_downloader(url,path_to_store_content): + img = urllib.urlopen(url) + fhand=open(path_to_store_content,'w') + size=0 + while True: + info=img.read(10000) + if len(info)<1: + break + else : + size=size+len(info) + fhand.write(info) + + + fhand.close() + + + +##replace the first function parameter with the path of the the url from which the content is to be fetched +## And the second parameter with the path on your local system to store the content fetched + +simple_downloader("Replace with the URL","Replace with the path on your local system where we want to store the content") + + diff --git a/word_frequency_in_python.py b/word_frequency_in_python.py new file mode 100644 index 00000000000..38727a857bb --- /dev/null +++ b/word_frequency_in_python.py @@ -0,0 +1,23 @@ +__author__ = 'tusharsappal' +## This script basically fetches the file from the user specified location and finds the frequency of each word in the text file +def word_frequency_counter(): + fname=raw_input("Enter the Name of the file : ") + try: + fhand = open(fname) + except: + print("The file cannot be opened",file) + exit() + + counts= dict() + for line in fhand: + words = line.split() + for word in words: + if word not in counts: + counts[word]=1 + else: + counts[word]=counts[word]+1 + + + print counts + +word_frequency_counter() \ No newline at end of file