Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Ubuntu Packages #31

Closed
wants to merge 2 commits into from

2 participants

@mrshu
Owner

This pull request adds Ubuntu packages as fathead.

mrshu added some commits
@mrshu mrshu added ubuntu packages as fathead 11fa7e0
@mrshu mrshu Parsing part rewritten
Fixes:

- moving tags ([.*]) to the end
- removing version properly
ecf6f63
@rpicard

Closing since we're working on this from kind of a different angle.

@rpicard rpicard closed this
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Jul 12, 2012
  1. @mrshu

    added ubuntu packages as fathead

    mrshu authored
Commits on Jul 28, 2012
  1. @mrshu

    Parsing part rewritten

    mrshu authored
    Fixes:
    
    - moving tags ([.*]) to the end
    - removing version properly
This page is out of date. Refresh to see the latest.
View
6 ubuntu_pkgs/README.txt
@@ -0,0 +1,6 @@
+Ubuntu Packages plugin for DukcDuckGo
+
+Dependencies:
+
+Python 2.7
+gzip (should be included in Python)
View
3  ubuntu_pkgs/fetch.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+wget -q -P download -N 'http://packages.ubuntu.com/precise/allpackages?format=txt.gz'
View
15 ubuntu_pkgs/meta.txt
@@ -0,0 +1,15 @@
+# This is the name of the source as people would refer to it, e.g. Wikipedia or PerlDoc
+Name: Ubuntu Packages
+
+# This is the base domain where the source pages are located.
+Domain: packages.ubuntu.com
+
+# This is what gets put in quotes next to the source
+# It can be blank if it is a source with completely general info spanning many types of topics like Facebook.
+Type: Ubuntu Package
+
+# Whether the source is from MediaWiki (1) or not (0).
+MediaWiki: 0
+
+# Keywords uses to trigger (or prefer) the source over others.
+Keywords: ubuntu package,deb
View
90 ubuntu_pkgs/parse.py
@@ -0,0 +1,90 @@
+# -*- coding: utf-8 -*-
+
+import logging
+import os
+import re
+import gzip
+import string
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger()
+
+class Package(object):
+ """ Contains informations about an Ubuntu package"""
+ def __init__(self, name, info, reference):
+ self.name = name
+ self.info = info
+ self.reference = reference
+
+ def __str__(self):
+ fields = [
+ self.name, # $page
+ '', # $namespace
+ self.reference, # $url
+ self.info, # $description
+ '', # $synopsis (code)
+ '', # $details
+ 'A', # $type
+ '' # $lang
+ ]
+
+ output = '%s' % ('\t'.join(fields))
+
+ return output
+
+
+class Parser(object):
+ """ Parses a HTML file to get
+ all packages from it"""
+
+ UBUNTU_PKGS_URL = 'http://packages.ubuntu.com'
+
+ def __init__(self, input='download/allpackages?format=txt.gz'):
+ self.input = gzip.open(input, 'rb')
+
+ for x in range(6):
+ self.input.readline()
+
+ def get_packages(self):
+ """ """
+
+ self.packages = []
+ for line in self.input:
+
+ if '(' in line:
+ data = re.match('(.*?) \(.*?\) (.*)', line).groups()
+ name = data[0]
+ info = data[1]
+ else:
+ data = line.split(' ')
+ name = data[0]
+ info = ' '.join(data[1::])
+
+ # fix for agda-bin package; removing non-ascii characters
+ info = filter(lambda x: x in string.printable, info)
+
+ if '[' in info:
+ data = re.match('\[(.*?)\] (.*)', info)
+ if data:
+ data = data.groups()
+ info = data[1] + ' [' + data[0] + ']'
+ else:
+ info = re.sub('\[(.*?)\]', '', info)
+
+ info = info.rstrip('\n')
+
+ reference = self.UBUNTU_PKGS_URL + '/' + name
+
+ package = Package(name, info, reference)
+ self.packages.append(package)
+
+ logger.info('Parsed package %s' % name)
+
+if __name__ == '__main__':
+ parser = Parser()
+ parser.get_packages()
+
+ with open('output.txt', 'w') as output:
+ for package in parser.packages:
+ output.write(package.__str__().encode('utf-8') + '\n')
+ logger.info('Package added to output: %s' % package.name)
View
2  ubuntu_pkgs/parse.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+python parse.py
View
0  ubuntu_pkgs/queries.txt
No changes.
Something went wrong with that request. Please try again.