Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
# gemfileparser
Parse Ruby Gemfile's using Python. Supports Gemfiles and .gemspec files.
# gemfileparser2
Parse Ruby Gemfile's using Python. Supports Gemfiles, .gemspec and Cocoapods(.podspec) files. Friendly fork of https://gitlab.com/balasankarc/gemfileparser.

[gemfileparser](https://gitlab.com/balasankarc/gemfileparser) can only detect particular type of dependency in `.gemspec` files like it can detect only `s.add_development_dependency "rspec", "~>1.3.1"` or `s.add_runtime_dependency "rspec", "~>1.3.1"` type of dependency. Dependency should be in these 2 format only.
[gemfileparser2](https://github.com/nexB/gemfileparser2) can detect all format of dependencies. This fork supports Gemfiles, .gemspec files and Cocoapods(.podspec) files.

### Installation
If using pip, use the command `sudo pip install gemfileparser`
If using pip, use the command `sudo pip install gemfileparser2`
Else use the following commands
```
git clone https://github.com/balasankarc/gemfileparser.git
cd gemfileparser
git clone https://github.com/nexB/gemfileparser2.git
cd gemfileparser2
python setup.py install
```

### Usage
```
from gemfileparser import GemfileParser
from gemfileparser2 import GemfileParser
parser = GemfileParser(<path to Gemfile>, <name of the application (optional)>)
dependency_dictionary = parser.parse()
```
Expand All @@ -37,7 +40,7 @@ group - Group in which gem is a member of (default : runtime)

#### Example
```
from gemfileparser import GemfileParser
from gemfileparser2 import GemfileParser
n = GemfileParser('Gemfile', 'diaspora')
deps = n.parse()
for key in deps:
Expand Down
165 changes: 89 additions & 76 deletions gemfileparser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

'''Python library to parse Ruby's Gemfiles and gemspec files.'''
"""
Python library to parse Ruby's Gemfiles and gemspec files.
"""

import csv
import io
Expand All @@ -25,65 +27,51 @@


class GemfileParser(object):

'''Creates a GemfileParser object to perform operations. '''
"""
Creates a GemfileParser object to perform operations.
"""

class Dependency(object):

''' A class to hold information about a dependency gem.'''
"""
A class to hold information about a dependency gem.
"""

def __init__(self):
self.name = None
self.name = ''
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why use empty strings instead of None?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the repo which we have forked is some commits behind. Using empty strings here are the original changes in the upstream repo. And may be this is helpful in extracting data from gemfile files.

self.requirement = []
self.autorequire = None
self.source = None
self.autorequire = ''
self.source = ''
self.parent = []
self.group = None
self.platform = None
self.platforms = []
self.groups = []
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess you removed these attributes because they are either not in the spec or seldom used or parsable?
platform is still a thing though rarely there alright https://guides.rubygems.org/specification-reference/#platform=
Can you confirm?

I never saw a group in a spec so far... did you?

Copy link
Author

@rpotter12 rpotter12 Jul 21, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think they are used in gemfile files. gemfileparser is for both gemspec and gemfile but I think @balankarc have implement these in another way so he removed this in the original repository :)


def __str__(self):
attributes = self.__dict__
output = {}
for key, value in attributes.items():
if value is None or value == []:
next
else:
output[key] = value
return str(output)
self.group = ''

gemfile_regexes = collections.OrderedDict()
gemfile_regexes['name'] = re.compile(r"gem ['\"](?P<name>.*?)['\"]")
gemfile_regexes['source'] = re.compile(
r".*source(:|[ ]?=>)[ ]*['\"](?P<source>[a-zA-Z:\/\.-\\]+)['\"].*")
r'source:[ ]?(?P<source>[a-zA-Z:\/\.-]+)')
gemfile_regexes['git'] = re.compile(
r".*git(:|[ ]?=>)[ ]*(?P<git>[a-zA-Z:\/\.-]+).*")
r'git:[ ]?(?P<git>[a-zA-Z:\/\.-]+)')
gemfile_regexes['platform'] = re.compile(
r".*platform(:|[ ]?=>)[ ]*(?P<platform>[a-zA-Z:\/\.-]+).*")
gemfile_regexes['platforms'] = re.compile(
r".*platforms(:|[ ]?=>)[ ]*(?P<platforms>\[.*\])[,]?.*")
r'platform:[ ]?(?P<platform>[a-zA-Z:\/\.-]+)')
gemfile_regexes['path'] = re.compile(
r".*path(:|[ ]?=>)[ ]*(?P<path>.+['\"\)]).*")
gemfile_regexes['github'] = re.compile(
r".*github(:|[ ]?=>)[ ]*[\'\"](?P<github>[a-zA-Z:\/\.-0-9]+)[\'\"].*")
r'path:[ ]?(?P<path>[a-zA-Z:\/\.-]+)')
gemfile_regexes['branch'] = re.compile(
r".*branch(:|[ ]?=>)[ ]*(?P<branch>[a-zA-Z:\/\.-]+).*")
r'branch:[ ]?(?P<branch>[a-zA-Z:\/\.-]+)')
gemfile_regexes['autorequire'] = re.compile(
r".*require(:|[ ]?=>)[ ]*(?P<autorequire>[a-zA-Z:\/\.-]+).*")
r'require:[ ]?(?P<autorequire>[a-zA-Z:\/\.-]+)')
gemfile_regexes['group'] = re.compile(
r".*group(:|[ ]?=>)[ ]*(?P<group>[a-zA-Z:\/\.-]+).*")
gemfile_regexes['groups'] = re.compile(
r".*groups(:|[ ]?=>)[ ]*(?P<groups>\[.*\]),.*")
r'group:[ ]?(?P<group>[a-zA-Z:\/\.-]+)')
gemfile_regexes['name'] = re.compile(
r'(?P<name>[a-zA-Z]+[\.0-9a-zA-Z _-]*)')
gemfile_regexes['requirement'] = re.compile(
r"gem[ ]['\"].*?['\"](?P<requirement>([>|<|=|~>|\d]+[ ]*[0-9\.\w]+[ ,]*)+).*")
r'(?P<requirement>([>|<|=|~>|\d]+[ ]*[0-9\.\w]+[ ,]*)+)')
global_group = 'runtime'
group_block_regex = re.compile(
r".*group[ ]?(:|[ ]?=>)[ ]*(?P<groupblock>.*?) do")
r'group[ ]?:[ ]?(?P<groupblock>.*?) do')
add_dvtdep_regex = re.compile(
r".*add_development_dependency (?P<line>.*)")
r'.*add_development_dependency(?P<line>.*)')
add_rundep_regex = re.compile(
r".*add_runtime_dependency (?P<line>.*)")
r'.*add_runtime_dependency(?P<line>.*)')
add_dep_regex = re.compile(
r'.*dependency(?P<line>.*)')

def __init__(self, filepath, appname=''):
self.filepath = filepath # Required when calls to gemspec occurs
Expand All @@ -92,27 +80,33 @@ def __init__(self, filepath, appname=''):
self.dependencies = {
'development': [],
'runtime': [],
'dependency': [],
'test': [],
'production': [],
'metrics': []
}
self.contents = self.gemfile.readlines()
if filepath.endswith('gemspec'):
path = ('gemspec', 'podspec')
if filepath.endswith(path):
self.gemspec = True
else:
self.gemspec = False

@staticmethod
def preprocess(line):
'''Removes the comment portion and excess spaces.'''
"""
Return line after removing comment portion and excess spaces.
"""

if "#" in line:
if '#' in line:
line = line[:line.index('#')]
line = line.strip()
return line

def parse_line(self, line):
'''Parses each line and creates dependency objects accordingly'''
"""
Parses each line and creates dependency objects accordingly.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use the imperative style for docstrings.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will change it soon :)

"""

try:

Expand All @@ -124,29 +118,43 @@ def parse_line(self, line):
line = unicode(line)
except NameError:
pass
dep = self.Dependency()
dep.group = GemfileParser.global_group
if not self.appname:
dep.parent = []
else:
linefile = io.StringIO(line) # csv requires a file object
for line in csv.reader(linefile, delimiter=','):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you remind my the benefits of using a CSV module here?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think @balasankarc used this CSV module here because dependency contains name and different version numbers and they are seperated by comma and they have to split by comma and if we use something else then to remove extra character the code will become more complex. CSV module might have make this easy to remove extra characters. I guess this might be the only good solution to use CSV module here.

column_list = []
for column in line:
stripped_column = column.replace("'", '')
stripped_column = stripped_column.replace('"', '')
stripped_column = stripped_column.replace('%q<', '')
stripped_column = stripped_column.replace('(', '')
stripped_column = stripped_column.replace(')', '')
stripped_column = stripped_column.replace('[', '')
stripped_column = stripped_column.replace(']', '')
stripped_column = stripped_column.strip()
column_list.append(stripped_column)
dep = self.Dependency()
dep.group = GemfileParser.global_group
dep.parent.append(self.appname)
# Check for a match in each regex and assign to
# corresponding variables
for criteria in GemfileParser.gemfile_regexes:
criteria_regex = GemfileParser.gemfile_regexes[criteria]
match = criteria_regex.match(line)
if match:
if criteria == 'requirement':
dep.requirement.append(match.group(criteria))
else:
setattr(dep, criteria, match.group(criteria))
if dep.group in self.dependencies:
self.dependencies[dep.group].append(dep)
else:
self.dependencies[dep.group] = [dep]
for column in column_list:
# Check for a match in each regex and assign to
# corresponding variables
for criteria in GemfileParser.gemfile_regexes:
criteria_regex = GemfileParser.gemfile_regexes[criteria]
match = criteria_regex.match(column)
if match:
if criteria == 'requirement':
dep.requirement.append(match.group(criteria))
else:
setattr(dep, criteria, match.group(criteria))
break
if dep.group in self.dependencies:
self.dependencies[dep.group].append(dep)
else:
self.dependencies[dep.group] = [dep]

def parse_gemfile(self, path=''):
'''Parses a Gemfile and returns a dict of categorized dependencies.'''
"""
Return dependencies after parsing gemfile.
"""

if path == '':
contents = self.contents
Expand All @@ -165,22 +173,21 @@ def parse_gemfile(self, path=''):
elif line.startswith('gemspec'):
# Gemfile contains a call to gemspec
gemfiledir = os.path.dirname(self.filepath)
gemspec_list = glob.glob(os.path.join(gemfiledir, "*.gemspec"))
gemspec_list = glob.glob(os.path.join(gemfiledir, '*.gemspec'))
if len(gemspec_list) > 1:
print("Multiple gemspec files found")
print('Multiple gemspec files found')
continue
elif len(gemspec_list) < 1:
print("No gemspec file found. Ignoring the gemspec call")
else:
gemspec_file = gemspec_list[0]
self.parse_gemspec(
path=os.path.join(gemfiledir, gemspec_file))
gemspec_file = gemspec_list[0]
self.parse_gemspec(path=os.path.join(gemfiledir, gemspec_file))
elif line.startswith('gem '):
line = line[3:]
self.parse_line(line)
return self.dependencies

def parse_gemspec(self, path=''):
'''Method to handle gemspec files.'''
"""
Return dependencies after parsing gemspec/podspec files.
"""

if path == '':
contents = self.contents
Expand All @@ -195,15 +202,21 @@ def parse_gemspec(self, path=''):
match = GemfileParser.add_rundep_regex.match(line)
if match:
GemfileParser.global_group = 'runtime'
else:
match = GemfileParser.add_dep_regex.match(line)
if match:
GemfileParser.global_group = 'dependency'
if match:
line = match.group('line')
self.parse_line(line)
return self.dependencies

def parse(self):
'''Calls necessary function based on whether file is a gemspec file
or not and forwards the dicts returned by them.'''
"""
Calls necessary function based on whether file is a gemspec/podspec file
or not and forwards the dicts returned by them.
"""
if self.gemspec:
return self.parse_gemspec()
else:
return self.parse_gemfile()
return self.parse_gemfile()
32 changes: 19 additions & 13 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,39 @@
from distutils.core import setup

config = {
'description': "Parse Ruby's Gemfiles",
'author': 'Balasankar C',
'url': 'https://gitlab.com/balasankarc/gemfileparser',
'download_url': 'https://gitlab.com/balasankarc/gemfileparser',
'author_email': 'balasankarc@autistici.org',
'description': "A library to parse Rubygem gemspec and Gemfile files and Cocoapods podspec and Podfile files using Python. Friendly fork of https://gitlab.com/balasankarc/gemfileparser",
'author': 'nexB',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please keep the original author name there. Or something like `Rohit Potter for nexB based on original work of Balasankar C' ... but in all cases we want Balasankar to get proper credits

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okh :)
I will change this in new commits.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In case we get to collaborate and move development upstream as suggested in #2 (comment), this can definitely change to an AUTHOR file based logic so that every contributor gets credit. 🙂

'url': 'https://github.com/nexB/gemfileparser2',
'download_url': 'https://github.com/nexB/gemfileparser2',
'author_email': 'info@aboutcode.org',
'version': '0.7.0',
'license': 'GPL-3+ and MIT',
'long_description': '''
Introduction
~~~~~~~~~~~~
Parse Ruby Gemfile's using Python. Supports Gemfiles, .gemspec and Cocoapods(.podspec) files. Friendly fork of https://gitlab.com/balasankarc/gemfileparser.
https://gitlab.com/balasankarc/gemfileparser can only detect particular type of dependency in `.gemspec` files like it can detect only `s.add_development_dependency "rspec", "~>1.3.1"` or `s.add_runtime_dependency "rspec", "~>1.3.1"` type of dependency. Dependency should be in these 2 format only.
https://github.com/nexB/gemfileparser2 can detect all format of dependencies. This fork supports Gemfiles, .gemspec files and Cocoapods(.podspec) files.
Installation
~~~~~~~~~~~~
| If using pip, use the command ``sudo pip install gemfileparser``
| If using pip, use the command ``sudo pip install gemfileparser2``
| Else use the following commands
::
git clone https://github.com/balasankarc/gemfileparser.git
cd gemfileparser
git clone https://github.com/balasankarc/gemfileparser2.git
cd gemfileparser2
python setup.py install
Usage
~~~~~
::
from gemfileparser import GemfileParser
from gemfileparser2 import GemfileParser
parser = GemfileParser(<path to Gemfile>, <name of the application (optional)>)
dependency_dictionary = parser.parse()
Expand Down Expand Up @@ -61,7 +68,7 @@
::
from gemfileparser import GemfileParser
from gemfileparser2 import GemfileParser
n = GemfileParser('Gemfile', 'diaspora')
deps = n.parse()
for key in deps:
Expand All @@ -82,10 +89,9 @@
.. _GNU GPL version 3 (or above) License: http://www.gnu.org/licenses/gpl
''',
'install_requires': ['nose'],
'packages': ['gemfileparser'],
'packages': ['gemfileparser2'],
'scripts': [],
'name': 'gemfileparser'
'name': 'gemfileparser2'
}

setup(
Expand Down